diff options
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r-- | lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 298 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 110 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 8 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 12 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.h | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrVSX.td | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetMachine.cpp | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetMachine.h | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 20 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetTransformInfo.h | 1 |
12 files changed, 432 insertions, 39 deletions
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 5fa7b2c6bfb1..54414457388d 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -77,6 +77,11 @@ STATISTIC(SignExtensionsAdded, "Number of sign extensions for compare inputs added."); STATISTIC(ZeroExtensionsAdded, "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); +STATISTIC(OmittedForNonExtendUses, + "Number of compares not eliminated as they have non-extending uses."); + // FIXME: Remove this once the bug has been fixed! cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -275,6 +280,8 @@ private: bool trySETCC(SDNode *N); bool tryEXTEND(SDNode *N); + bool tryLogicOpOfCompares(SDNode *N); + SDValue computeLogicOpInGPR(SDValue LogicOp); SDValue signExtendInputIfNeeded(SDValue Input); SDValue zeroExtendInputIfNeeded(SDValue Input); SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); @@ -282,6 +289,10 @@ private: int64_t RHSValue, SDLoc dl); SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, int64_t RHSValue, SDLoc dl); + SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); void PeepholePPC64(); @@ -2501,6 +2512,11 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return true; } +// Is this opcode a bitwise logical operation? +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} + /// If this node is a sign/zero extension of an integer comparison, /// it can usually be computed in GPR's rather than using comparison /// instructions and ISEL. We only do this on 64-bit targets for now @@ -2513,13 +2529,20 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { N->getOpcode() == ISD::SIGN_EXTEND) && "Expecting a zero/sign extend node!"); - if (N->getOperand(0).getOpcode() != ISD::SETCC) + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = computeLogicOpInGPR(N->getOperand(0)); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) return false; - - SDValue WideRes = - getSETCCInGPR(N->getOperand(0), - N->getOpcode() == ISD::SIGN_EXTEND ? - SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); + else + WideRes = + getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND ? + SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); if (!WideRes) return false; @@ -2540,6 +2563,159 @@ bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { return true; } +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) { + assert(isLogicOp(LogicOp.getOpcode()) && + "Can only handle logic operations here."); + assert(LogicOp.getValueType() == MVT::i1 && + "Can only handle logic operations on i1 values here."); + SDLoc dl(LogicOp); + SDValue LHS, RHS; + + // Special case: xor %a, -1 + bool IsBitwiseNegation = isBitwiseNot(LogicOp); + + // Produces a GPR sequence for each operand of the binary logic operation. + // For SETCC, it produces the respective comparison, for TRUNCATE it truncates + // the value in a GPR and for logic operations, it will recursively produce + // a GPR sequence for the operation. + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + return + SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + getI64Imm(0, dl), getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return computeLogicOpInGPR(Operand); + return SDValue(); + }; + LHS = getLogicOperand(LogicOp.getOperand(0)); + RHS = getLogicOperand(LogicOp.getOperand(1)); + + // If a GPR sequence can't be produced for the LHS we can't proceed. + // Not producing a GPR sequence for the RHS is only a problem if this isn't + // a bitwise negation operation. + if (!LHS || (!RHS && !IsBitwiseNegation)) + return SDValue(); + + NumLogicOpsOnComparison++; + + // We will use the inputs as 64-bit values. + if (LHS.getValueType() == MVT::i32) + LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); + if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) + RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logic operation."); + case ISD::AND: NewOpc = PPC::AND8; break; + case ISD::OR: NewOpc = PPC::OR8; break; + case ISD::XOR: NewOpc = PPC::XOR8; break; + } + + if (IsBitwiseNegation) { + RHS = getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); + +} + +/// Try performing logical operations on results of comparisons in GPRs. +/// It is typically preferred from a performance perspective over performing +/// the operations on individual bits in the CR. We only do this on 64-bit +/// targets for now as the code is specialized for 64-bit (it uses 64-bit +/// instructions and assumes 64-bit registers). +bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + if (N->getValueType(0) != MVT::i1) + return false; + assert(isLogicOp(N->getOpcode()) && + "Expected a logic operation on setcc results."); + SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); + if (!LoweredLogical) + return false; + + SDLoc dl(N); + bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; + unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue LHS = LoweredLogical.getOperand(0); + SDValue RHS = LoweredLogical.getOperand(1); + SDValue WideOp; + SDValue OpToConvToRecForm; + + // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode + // that is input to the XORI. + if (IsBitwiseNegate && + LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) + OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); + else if (IsBitwiseNegate) + // If the input to the XORI isn't an extension, that's what we're after. + OpToConvToRecForm = LoweredLogical.getOperand(0); + else + // If this is not an XORI, it is a reg-reg logical op and we can convert it + // to record-form. + OpToConvToRecForm = LoweredLogical; + + // Get the record-form version of the node we're looking to use to get the + // CR result from. + uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); + int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); + + // Convert the right node to record-form. This is either the logical we're + // looking at or it is the input node to the negation (if we're looking at + // a bitwise negation). + if (NewOpc != -1 && IsBitwiseNegate) { + // The input to the XORI has a record-form. Use it. + assert(LoweredLogical.getConstantOperandVal(1) == 1 && + "Expected a PPC::XORI8 only for bitwise negation."); + // Emit the record-form instruction. + std::vector<SDValue> Ops; + for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) + Ops.push_back(OpToConvToRecForm.getOperand(i)); + + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc, dl, + OpToConvToRecForm.getValueType(), + MVT::Glue, Ops), 0); + } else { + assert((NewOpc != -1 || !IsBitwiseNegate) && + "No record form available for AND8/OR8/XOR8?"); + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, + MVT::i64, MVT::Glue, LHS, RHS), 0); + } + + // Select this node to a single bit from CR0 set by the record-form node + // just created. For bitwise negation, use the EQ bit which is the equivalent + // of negating the result (i.e. it is a bit set when the result of the + // operation is zero). + SDValue SRIdxVal = + CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); + SDValue CRBit = + SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + ReplaceNode(N, CRBit.getNode()); + return true; +} + /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. /// Useful when emitting comparison code for 32-bit values without using /// the compare instruction (which only considers the lower 32-bits). @@ -2677,6 +2853,77 @@ SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, } } +/// Produces a zero-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) + // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, + getI64Imm(58, dl), getI64Imm(63, dl)), + 0); + } + } +} + +/// Produces a sign-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue AddInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Addic = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + AddInput, getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, + Addic, Addic.getValue(1)), 0); + } + } +} + +/// Does this SDValue have any uses for which keeping the value in a GPR is +/// appropriate. This is meant to be used on values that have type i1 since +/// it is somewhat meaningless to ask if values of other types can be kept in +/// GPR's. +static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { + assert(Compare.getOpcode() == ISD::SETCC && + "An ISD::SETCC node required here."); + + // For values that have a single use, the caller should obviously already have + // checked if that use is an extending use. We check the other uses here. + if (Compare.hasOneUse()) + return true; + // We want the value in a GPR if it is being extended, used for a select, or + // used in logical operations. + for (auto CompareUse : Compare.getNode()->uses()) + if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && + CompareUse->getOpcode() != ISD::ZERO_EXTEND && + CompareUse->getOpcode() != ISD::SELECT && + !isLogicOp(CompareUse->getOpcode())) { + OmittedForNonExtendUses++; + return false; + } + return true; +} + /// Returns an equivalent of a SETCC node but with the result the same width as /// the inputs. This can nalso be used for SELECT_CC if either the true or false /// values is a power of two while the other is zero. @@ -2686,6 +2933,11 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, Compare.getOpcode() == ISD::SELECT_CC) && "An ISD::SETCC node required here."); + // Don't convert this comparison to a GPR sequence because there are uses + // of the i1 result (i.e. uses that require the result in the CR). + if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) + return SDValue(); + SDValue LHS = Compare.getOperand(0); SDValue RHS = Compare.getOperand(1); @@ -2694,30 +2946,35 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, ISD::CondCode CC = cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); EVT InputVT = LHS.getValueType(); - if (InputVT != MVT::i32) + if (InputVT != MVT::i32 && InputVT != MVT::i64) return SDValue(); - SDLoc dl(Compare); - ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); - int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; - if (ConvOpts == SetccInGPROpts::ZExtInvert || ConvOpts == SetccInGPROpts::SExtInvert) CC = ISD::getSetCCInverse(CC, true); - if (ISD::isSignedIntSetCC(CC)) { + bool Inputs32Bit = InputVT == MVT::i32; + if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { LHS = signExtendInputIfNeeded(LHS); RHS = signExtendInputIfNeeded(RHS); - } else if (ISD::isUnsignedIntSetCC(CC)) { + } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { LHS = zeroExtendInputIfNeeded(LHS); RHS = zeroExtendInputIfNeeded(RHS); } + SDLoc dl(Compare); + ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); + int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || ConvOpts == SetccInGPROpts::SExtInvert; - if (IsSext) + + if (IsSext && Inputs32Bit) return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); - return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (Inputs32Bit) + return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (IsSext) + return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); } void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { @@ -2906,6 +3163,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::AND: { + if (tryLogicOpOfCompares(N)) + return; + unsigned Imm, Imm2, SH, MB, ME; uint64_t Imm64; @@ -3025,6 +3285,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryBitfieldInsert(N)) return; + if (tryLogicOpOfCompares(N)) + return; + short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { @@ -3042,6 +3305,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::XOR: { + if (tryLogicOpOfCompares(N)) + return; + break; + } case ISD::ADD: { short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 216efcc4a1ee..41ff9d903aa0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1041,6 +1041,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; + MaxLoadsPerMemcmp = 128; + } else { + MaxLoadsPerMemcmp = 8; + MaxLoadsPerMemcmpOptSize = 4; } } @@ -1112,6 +1116,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; @@ -1593,17 +1598,25 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } - // Check that the mask is shuffling words -static bool isWordShuffleMask(ShuffleVectorSDNode *N) { - for (unsigned i = 0; i < 4; ++i) { - unsigned B0 = N->getMaskElt(i*4); - unsigned B1 = N->getMaskElt(i*4+1); - unsigned B2 = N->getMaskElt(i*4+2); - unsigned B3 = N->getMaskElt(i*4+3); - if (B0 % 4) - return false; - if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) +// Check that the mask is shuffling N byte elements. +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) { + assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && + "Unexpected element width."); + + unsigned NumOfElem = 16 / Width; + unsigned MaskVal[16]; // Width is never greater than 16 + for (unsigned i = 0; i < NumOfElem; ++i) { + MaskVal[0] = N->getMaskElt(i * Width); + if (MaskVal[0] % Width) { return false; + } + + for (unsigned int j = 1; j < Width; ++j) { + MaskVal[j] = N->getMaskElt(i * Width + j); + if (MaskVal[j] != MaskVal[j-1] + 1) { + return false; + } + } } return true; @@ -1611,7 +1624,7 @@ static bool isWordShuffleMask(ShuffleVectorSDNode *N) { bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { - if (!isWordShuffleMask(N)) + if (!isNByteElemShuffleMask(N, 4)) return false; // Now we look at mask elements 0,4,8,12 @@ -1688,7 +1701,7 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the word is consecutive. - if (!isWordShuffleMask(N)) + if (!isNByteElemShuffleMask(N, 4)) return false; // Now we look at mask elements 0,4,8,12, which are the beginning of words. @@ -1746,6 +1759,66 @@ bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, } } +/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap +/// if the inputs to the instruction should be swapped and set \p DM to the +/// value for the immediate. +/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI +/// AND element 0 of the result comes from the first input (LE) or second input +/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. +/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle +/// mask. +bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + // Ensure each byte index of the double word is consecutive. + if (!isNByteElemShuffleMask(N, 8)) + return false; + + unsigned M0 = N->getMaskElt(0) / 8; + unsigned M1 = N->getMaskElt(8) / 8; + assert(((M0 | M1) < 4) && "A mask element out of bounds?"); + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + if ((M0 | M1) < 2) { + DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); + Swap = false; + return true; + } else + return false; + } + + if (IsLE) { + if (M0 > 1 && M1 < 2) { + Swap = false; + } else if (M0 < 2 && M1 > 1) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + // Note: if control flow comes here that means Swap is already set above + DM = (((~M1) & 1) << 1) + ((~M0) & 1); + return true; + } else { // BE + if (M0 < 2 && M1 > 1) { + Swap = false; + } else if (M0 > 1 && M1 < 2) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + // Note: if control flow comes here that means Swap is already set above + DM = (M0 << 1) + (M1 & 1); + return true; + } +} + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. @@ -7760,6 +7833,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); } + if (Subtarget.hasVSX() && + PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); + + SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2f9eb95f6de6..7982a4a9e9fb 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -90,6 +90,10 @@ namespace llvm { /// VECSHL, + /// XXPERMDI - The PPC XXPERMDI instruction + /// + XXPERMDI, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -454,6 +458,10 @@ namespace llvm { /// for a XXSLDWI instruction. bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE); + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXPERMDI instruction. + bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 165970f9678c..295590b2acf6 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -735,12 +735,12 @@ def RLDICL_32_64 : MDForm_1<30, 0, "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; // End fast-isel. -let isCodeGenOnly = 1 in -def RLDICL_32 : MDForm_1<30, 0, - (outs gprc:$rA), - (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, - []>, isPPC64; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLDICL_32 : MDForm_1r<30, 0, + (outs gprc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index fd6785e963a6..f3c68c443b1b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1983,3 +1983,7 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { return &PPC::VSRCRegClass; return RC; } + +int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { + return PPC::getRecordFormOpcode(Opcode); +} diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index b30d09e03ec4..8dd4dbb60879 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -290,6 +290,7 @@ public: return Reg >= PPC::V0 && Reg <= PPC::V31; } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; + static int getRecordFormOpcode(unsigned Opcode); }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 26b99eced23c..8223aa655e38 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,10 @@ def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -170,6 +174,7 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 1589ab03e507..c4139ca8b7bd 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -843,7 +843,9 @@ let Uses = [RM] in { def XXPERMDI : XX3Form_2<60, 10, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), - "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, + [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, + imm32SExt16:$DM))]>; let isCodeGenOnly = 1 in def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ddae5befee3e..b9004cc8a9f5 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -296,7 +296,7 @@ namespace { /// PPC Code Generator Pass Configuration Options. class PPCPassConfig : public TargetPassConfig { public: - PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) + PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} PPCTargetMachine &getPPCTargetMachine() const { @@ -316,7 +316,7 @@ public: } // end anonymous namespace TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { - return new PPCPassConfig(this, PM); + return new PPCPassConfig(*this, PM); } void PPCPassConfig::addIRPasses() { diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index f2838351cee5..b8f5a2083d80 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -55,6 +55,10 @@ public: const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; + + bool isMachineVerifierClean() const override { + return false; + } }; /// PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 7ee1317bf72f..5559cdc5fe46 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -215,6 +215,11 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { return LoopHasReductions; } +bool PPCTTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { + MaxLoadSize = 8; + return true; +} + bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } @@ -239,9 +244,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } unsigned PPCTTIImpl::getCacheLineSize() { - // This is currently only used for the data prefetch pass which is only - // enabled for BG/Q by default. - return CacheLineSize; + // Check first if the user specified a custom line size. + if (CacheLineSize.getNumOccurrences() > 0) + return CacheLineSize; + + // On P7, P8 or P9 we have a cache line size of 128. + unsigned Directive = ST->getDarwinDirective(); + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || + Directive == PPC::DIR_PWR9) + return 128; + + // On other processors return a default of 64 bytes. + return 64; } unsigned PPCTTIImpl::getPrefetchDistance() { diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 6ce70fbd8778..2e0116fee04c 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -60,6 +60,7 @@ public: /// @{ bool enableAggressiveInterleaving(bool LoopHasReductions); + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); |