diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 228 |
1 files changed, 160 insertions, 68 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3b24951d1dc9..aa819eeb30a2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -51,9 +51,11 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", -cl::desc("enable preincrement load/store generation on PPC (experimental)"), - cl::Hidden); +static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", +cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); + +static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", +cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) @@ -64,6 +66,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { + const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); setPow2DivIsCheap(); @@ -73,12 +76,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. - setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4); + bool isPPC64 = Subtarget->isPPC64(); + setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. - addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); - addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); - addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); + addRegisterClass(MVT::i32, &PPC::GPRCRegClass); + addRegisterClass(MVT::f32, &PPC::F4RCRegClass); + addRegisterClass(MVT::f64, &PPC::F8RCRegClass); // PowerPC has an i16 but no i8 (or i1) SEXTLOAD setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -130,17 +134,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FMA , MVT::f64, Expand); + setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FMA , MVT::f32, Expand); + setOperationAction(ISD::FMA , MVT::f32, Legal); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { + if (!Subtarget->hasFSQRT()) { setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSQRT, MVT::f32, Expand); } @@ -226,8 +230,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + if (Subtarget->isSVR4ABI()) { + if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); @@ -271,7 +275,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { + if (Subtarget->has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -290,9 +294,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } - if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly - addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); + addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // 64-bit PowerPC wants to expand i128 shifts itself. @@ -306,7 +310,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { + if (Subtarget->hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -370,12 +374,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i32, Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); - addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); - addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); - addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); - addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); + addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); + addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); + addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); + addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); + setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -389,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + if (Subtarget->has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + } setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -398,7 +405,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + if (isPPC64) { setStackPointerRegisterToSaveRestore(PPC::X1); setExceptionPointerRegister(PPC::X3); setExceptionSelectorRegister(PPC::X4); @@ -415,7 +422,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::BSWAP); // Darwin long double math library functions have $LDBL128 appended. - if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { + if (Subtarget->isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); @@ -432,6 +439,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (PPCSubTarget.isDarwin()) setPrefFunctionAlignment(4); + if (isPPC64 && Subtarget->isJITCodeModel()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + setSupportJumpTables(false); + setInsertFencesForAtomic(true); setSchedulingPreference(Sched::Hybrid); @@ -902,10 +914,11 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). - assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() + assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || + Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable); Base = N.getOperand(0); @@ -1006,7 +1019,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, if (N.getOpcode() == ISD::ADD) { short imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { - Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); + Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); } else { @@ -1015,7 +1028,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). - assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() + assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || @@ -1084,8 +1097,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - // Disabled by default for now. - if (!EnablePPCPreinc) return false; + if (DisablePPCPreinc) return false; SDValue Ptr; EVT VT; @@ -1103,7 +1115,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - // TODO: Check reg+reg first. + if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } // LDU/STU use reg+imm*4, others use reg+imm. if (VT != MVT::i64) { @@ -1222,6 +1237,30 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); } +SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + EVT PtrVT = getPointerTy(); + bool is64bit = PPCSubTarget.isPPC64(); + + TLSModel::Model model = getTargetMachine().getTLSModel(GV); + + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_HA); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_LO); + + if (model != TLSModel::LocalExec) + llvm_unreachable("only local-exec TLS mode supported"); + SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, + is64bit ? MVT::i64 : MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); + return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); +} + SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); @@ -1440,13 +1479,16 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) - std::pair<SDValue, SDValue> CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, + TargetLowering::CallLoweringInfo CLI(Chain, + Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, + CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, + /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -1702,7 +1744,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); @@ -1721,19 +1763,19 @@ PPCTargetLowering::LowerFormalArguments_SVR4( default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i32: - RC = PPC::GPRCRegisterClass; + RC = &PPC::GPRCRegClass; break; case MVT::f32: - RC = PPC::F4RCRegisterClass; + RC = &PPC::F4RCRegClass; break; case MVT::f64: - RC = PPC::F8RCRegisterClass; + RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v4f32: - RC = PPC::VRRCRegisterClass; + RC = &PPC::VRRCRegClass; break; } @@ -1763,7 +1805,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // caller's stack frame, right above the parameter list area. SmallVector<CCValAssign, 16> ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ByValArgLocs, *DAG.getContext()); + getTargetMachine(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -2743,7 +2785,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. @@ -2800,7 +2842,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); for (unsigned i = 0; i != RVLocs.size(); ++i) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); @@ -2864,14 +2906,19 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, } SDValue -PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool doesNotRet, bool &isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, +PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; + SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool isVarArg = CLI.IsVarArg; + if (isTailCall) isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); @@ -2921,7 +2968,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing arguments. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); @@ -2961,7 +3008,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // Assign locations to all of the outgoing aggregate by value arguments. SmallVector<CCValAssign, 16> ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ByValArgLocs, *DAG.getContext()); + getTargetMachine(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); @@ -3485,7 +3532,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); // If this is the first return lowered for this function, add the regs to the @@ -4559,7 +4606,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: llvm_unreachable("TLS not implemented for PPC"); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); @@ -4899,11 +4946,37 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (MI->getOpcode() == PPC::SELECT_CC_I4 || - MI->getOpcode() == PPC::SELECT_CC_I8 || - MI->getOpcode() == PPC::SELECT_CC_F4 || - MI->getOpcode() == PPC::SELECT_CC_F8 || - MI->getOpcode() == PPC::SELECT_CC_VRRC) { + if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8)) { + unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ? + PPC::ISEL8 : PPC::ISEL; + unsigned SelectPred = MI->getOperand(4).getImm(); + DebugLoc dl = MI->getDebugLoc(); + + // The SelectPred is ((BI << 5) | BO) for a BCC + unsigned BO = SelectPred & 0xF; + assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); + + unsigned TrueOpNo, FalseOpNo; + if (BO == 12) { + TrueOpNo = 2; + FalseOpNo = 3; + } else { + TrueOpNo = 3; + FalseOpNo = 2; + SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + } + + BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(TrueOpNo).getReg()) + .addReg(MI->getOperand(FalseOpNo).getReg()) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || + MI->getOpcode() == PPC::SELECT_CC_I8 || + MI->getOpcode() == PPC::SELECT_CC_F4 || + MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_VRRC) { + // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to @@ -5612,18 +5685,18 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'b': // R1-R31 case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) - return std::make_pair(0U, PPC::G8RCRegisterClass); - return std::make_pair(0U, PPC::GPRCRegisterClass); + return std::make_pair(0U, &PPC::G8RCRegClass); + return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': if (VT == MVT::f32) - return std::make_pair(0U, PPC::F4RCRegisterClass); - else if (VT == MVT::f64) - return std::make_pair(0U, PPC::F8RCRegisterClass); + return std::make_pair(0U, &PPC::F4RCRegClass); + if (VT == MVT::f64) + return std::make_pair(0U, &PPC::F8RCRegClass); break; case 'v': - return std::make_pair(0U, PPC::VRRCRegisterClass); + return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc - return std::make_pair(0U, PPC::CRRCRegisterClass); + return std::make_pair(0U, &PPC::CRRCRegClass); } } @@ -5839,11 +5912,30 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than +/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to +/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd +/// is expanded to mul + add. +bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + case MVT::v4f32: + return true; + default: + break; + } + + return false; +} + Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - unsigned Directive = PPCSubTarget.getDarwinDirective(); - if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) - return Sched::ILP; + if (DisableILPPref) + return TargetLowering::getSchedulingPreference(N); - return TargetLowering::getSchedulingPreference(N); + return Sched::ILP; } |