diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1357 |
1 files changed, 869 insertions, 488 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8cf6a660b08b..60ed72e1018b 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -52,6 +52,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" @@ -66,6 +67,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -119,6 +121,9 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden); static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision", cl::desc("enable quad precision float support on ppc"), cl::Hidden); +static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables", +cl::desc("use absolute jump tables on ppc"), cl::Hidden); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); @@ -132,10 +137,6 @@ extern cl::opt<bool> ANDIGlueBug; PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - // Use _setjmp/_longjmp instead of setjmp/longjmp. - setUseUnderscoreSetJmp(true); - setUseUnderscoreLongJmp(true); - // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); @@ -389,6 +390,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::LRINT, MVT::f64, Legal); + setOperationAction(ISD::LRINT, MVT::f32, Legal); + setOperationAction(ISD::LLRINT, MVT::f64, Legal); + setOperationAction(ISD::LLRINT, MVT::f32, Legal); + setOperationAction(ISD::LROUND, MVT::f64, Legal); + setOperationAction(ISD::LROUND, MVT::f32, Legal); + setOperationAction(ISD::LLROUND, MVT::f64, Legal); + setOperationAction(ISD::LLROUND, MVT::f32, Legal); + } } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); @@ -548,6 +559,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); + } + if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. @@ -702,6 +720,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (!Subtarget.hasP8Altivec()) setOperationAction(ISD::ABS, MVT::v2i64, Expand); + // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w). + if (Subtarget.hasAltivec()) + for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8}) + setOperationAction(ISD::ROTL, VT, Legal); + // With hasP8Altivec set, we can lower ISD::ROTL to vrld. + if (Subtarget.hasP8Altivec()) + setOperationAction(ISD::ROTL, MVT::v2i64, Legal); + addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); @@ -756,13 +782,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); + // The nearbyint variants are not allowed to raise the inexact exception + // so we can only code-gen them with unsafe math. + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + } + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); @@ -910,12 +946,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FREM, MVT::f128, Expand); } setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); - + setOperationAction(ISD::BSWAP, MVT::v8i16, Legal); + setOperationAction(ISD::BSWAP, MVT::v4i32, Legal); + setOperationAction(ISD::BSWAP, MVT::v2i64, Legal); + setOperationAction(ISD::BSWAP, MVT::v1i128, Legal); } if (Subtarget.hasP9Altivec()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } } @@ -1183,7 +1230,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.isDarwin()) setPrefFunctionAlignment(Align(16)); - switch (Subtarget.getDarwinDirective()) { + switch (Subtarget.getCPUDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_A2: @@ -1198,6 +1245,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); break; @@ -1212,15 +1260,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). - if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || - Subtarget.getDarwinDirective() == PPC::DIR_E5500) { + if (Subtarget.getCPUDirective() == PPC::DIR_E500mc || + Subtarget.getCPUDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; - } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) { + } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) { // The A2 also benefits from (very) aggressive inlining of memcpy and // friends. The overhead of a the function call, even when warm, can be // over one hundred cycles. @@ -1294,6 +1342,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; + case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP"; + case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; @@ -1314,7 +1364,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; - case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; @@ -1345,8 +1394,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; - case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; - case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; + case PPCISD::ANDI_rec_1_EQ_BIT: + return "PPCISD::ANDI_rec_1_EQ_BIT"; + case PPCISD::ANDI_rec_1_GT_BIT: + return "PPCISD::ANDI_rec_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; case PPCISD::LBRX: return "PPCISD::LBRX"; @@ -2699,9 +2750,9 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); const Constant *C = CP->getConstVal(); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI and AIX ABI code are always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.is64BitELFABI()) { + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); return getTOCEntry(DAG, SDLoc(CP), GA); @@ -2735,14 +2786,16 @@ unsigned PPCTargetLowering::getJumpTableEncoding() const { } bool PPCTargetLowering::isJumpTableRelative() const { - if (Subtarget.isPPC64()) + if (UseAbsoluteJumpTables) + return false; + if (Subtarget.isPPC64() || Subtarget.isAIXABI()) return true; return TargetLowering::isJumpTableRelative(); } SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { - if (!Subtarget.isPPC64()) + if (!Subtarget.isPPC64() || Subtarget.isAIXABI()) return TargetLowering::getPICJumpTableRelocBase(Table, DAG); switch (getTargetMachine().getCodeModel()) { @@ -2759,7 +2812,7 @@ const MCExpr * PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { - if (!Subtarget.isPPC64()) + if (!Subtarget.isPPC64() || Subtarget.isAIXABI()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); switch (getTargetMachine().getCodeModel()) { @@ -2775,9 +2828,9 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI and AIX ABI code are always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (Subtarget.is64BitELFABI()) { + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return getTOCEntry(DAG, SDLoc(JT), GA); @@ -2804,9 +2857,9 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); const BlockAddress *BA = BASDN->getBlockAddress(); - // 64-bit SVR4 ABI code is always position-independent. + // 64-bit SVR4 ABI and AIX ABI code are always position-independent. // The actual BlockAddress is stored in the TOC. - if (Subtarget.is64BitELFABI()) { + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); return getTOCEntry(DAG, SDLoc(BASDN), GA); @@ -3129,11 +3182,17 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.isAIXABI()) + report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX."); + return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.isAIXABI()) + report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX."); + SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -3394,15 +3453,16 @@ SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + if (Subtarget.isAIXABI()) + return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); if (Subtarget.is64BitELFABI()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - else if (Subtarget.is32BitELFABI()) + if (Subtarget.is32BitELFABI()) return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - // FIXME: We are using this for both AIX and Darwin. We should add appropriate - // AIX testing, and rename it appropriately. return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } @@ -4934,213 +4994,6 @@ static bool isFunctionGlobalAddress(SDValue Callee) { return false; } -static unsigned -PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, - SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, - bool isPatchPoint, bool hasNest, - SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, - SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, - ImmutableCallSite CS, const PPCSubtarget &Subtarget) { - bool isPPC64 = Subtarget.isPPC64(); - bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI(); - bool isAIXABI = Subtarget.isAIXABI(); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. - - unsigned CallOpc = PPCISD::CALL; - - bool needIndirectCall = true; - if (!isSVR4ABI || !isPPC64) - if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { - // If this is an absolute destination address, use the munged value. - Callee = SDValue(Dest, 0); - needIndirectCall = false; - } - - // PC-relative references to external symbols should go through $stub, unless - // we're building with the leopard linker or later, which automatically - // synthesizes these stubs. - const TargetMachine &TM = DAG.getTarget(); - const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); - const GlobalValue *GV = nullptr; - if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) - GV = G->getGlobal(); - bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); - bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, - // every direct call is) turn it into a TargetGlobalAddress / - // TargetExternalSymbol node so that legalize doesn't hack it. - if (isFunctionGlobalAddress(Callee)) { - GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); - - // A call to a TLS address is actually an indirect call to a - // thread-specific pointer. - unsigned OpFlags = 0; - if (UsePlt) - OpFlags = PPCII::MO_PLT; - - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType(), 0, OpFlags); - needIndirectCall = false; - } - - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - unsigned char OpFlags = 0; - - if (UsePlt) - OpFlags = PPCII::MO_PLT; - - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), - OpFlags); - needIndirectCall = false; - } - - if (isPatchPoint) { - // We'll form an invalid direct call when lowering a patchpoint; the full - // sequence for an indirect call is complicated, and many of the - // instructions introduced might have side effects (and, thus, can't be - // removed later). The call itself will be removed as soon as the - // argument/return lowering is complete, so the fact that it has the wrong - // kind of operands should not really matter. - needIndirectCall = false; - } - - if (needIndirectCall) { - // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair - // to do the call, we can't use PPCISD::CALL. - SDValue MTCTROps[] = {Chain, Callee, InFlag}; - - if (is64BitELFv1ABI) { - // Function pointers in the 64-bit SVR4 ABI do not point to the function - // entry point, but to the function descriptor (the function entry point - // address is part of the function descriptor though). - // The function descriptor is a three doubleword structure with the - // following fields: function entry point, TOC base address and - // environment pointer. - // Thus for a call through a function pointer, the following actions need - // to be performed: - // 1. Save the TOC of the caller in the TOC save area of its stack - // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). - // 2. Load the address of the function entry point from the function - // descriptor. - // 3. Load the TOC of the callee from the function descriptor into r2. - // 4. Load the environment pointer from the function descriptor into - // r11. - // 5. Branch to the function entry point address. - // 6. On return of the callee, the TOC of the caller needs to be - // restored (this is done in FinishCall()). - // - // The loads are scheduled at the beginning of the call sequence, and the - // register copies are flagged together to ensure that no other - // operations can be scheduled in between. E.g. without flagging the - // copies together, a TOC access in the caller could be scheduled between - // the assignment of the callee TOC and the branch to the callee, which - // results in the TOC access going through the TOC of the callee instead - // of going through the TOC of the caller, which leads to incorrect code. - - // Load the address of the function entry point from the function - // descriptor. - SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); - if (LDChain.getValueType() == MVT::Glue) - LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); - - auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() - ? (MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant) - : MachineMemOperand::MONone; - - MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); - SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, - /* Alignment = */ 8, MMOFlags); - - // Load environment pointer into r11. - SDValue PtrOff = DAG.getIntPtrConstant(16, dl); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); - SDValue LoadEnvPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), - /* Alignment = */ 8, MMOFlags); - - SDValue TOCOff = DAG.getIntPtrConstant(8, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); - SDValue TOCPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), - /* Alignment = */ 8, MMOFlags); - - setUsesTOCBasePtr(DAG); - SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, - InFlag); - Chain = TOCVal.getValue(0); - InFlag = TOCVal.getValue(1); - - // If the function call has an explicit 'nest' parameter, it takes the - // place of the environment pointer. - if (!hasNest) { - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, - InFlag); - - Chain = EnvVal.getValue(0); - InFlag = EnvVal.getValue(1); - } - - MTCTROps[0] = Chain; - MTCTROps[1] = LoadFuncPtr; - MTCTROps[2] = InFlag; - } - - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, - makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); - InFlag = Chain.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::Other); - NodeTys.push_back(MVT::Glue); - Ops.push_back(Chain); - CallOpc = PPCISD::BCTRL; - Callee.setNode(nullptr); - // Add use of X11 (holding environment pointer) - if (is64BitELFv1ABI && !hasNest) - Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); - // Add CTR register as callee so a bctr can be emitted later. - if (isTailCall) - Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); - } - - // If this is a direct call, pass the chain and the callee. - if (Callee.getNode()) { - Ops.push_back(Chain); - Ops.push_back(Callee); - } - // If this is a tail call add stack pointer delta. - if (isTailCall) - Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); - - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register - // live into the call. - // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT. - if ((isSVR4ABI && isPPC64) || isAIXABI) { - setUsesTOCBasePtr(DAG); - - // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is - // no way to mark dependencies as implicit here. - // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. - if (!isPatchPoint) - Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2 - : PPC::R2, PtrVT)); - } - - return CallOpc; -} - SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, @@ -5205,30 +5058,357 @@ SDValue PPCTargetLowering::LowerCallResult( return Chain; } -SDValue PPCTargetLowering::FinishCall( - CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, - bool isPatchPoint, bool hasNest, SelectionDAG &DAG, - SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag, - SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, - unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, - SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const { - std::vector<EVT> NodeTys; - SmallVector<SDValue, 8> Ops; - unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, - SPDiff, isTailCall, isPatchPoint, hasNest, - RegsToPass, Ops, NodeTys, CS, Subtarget); +static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, bool isPatchPoint) { + // PatchPoint calls are not indirect. + if (isPatchPoint) + return false; + + if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee)) + return false; + + // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not + // becuase the immediate function pointer points to a descriptor instead of + // a function entry point. The ELFv2 ABI cannot use a BLA because the function + // pointer immediate points to the global entry point, while the BLA would + // need to jump to the local entry point (see rL211174). + if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() && + isBLACompatibleAddress(Callee, DAG)) + return false; + + return true; +} + +static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint, + bool isTailCall, const Function &Caller, + const SDValue &Callee, + const PPCSubtarget &Subtarget, + const TargetMachine &TM) { + if (isTailCall) + return PPCISD::TC_RETURN; + + // This is a call through a function pointer. + if (isIndirectCall) { + // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross + // indirect calls. The save of the caller's TOC pointer to the stack will be + // inserted into the DAG as part of call lowering. The restore of the TOC + // pointer is modeled by using a pseudo instruction for the call opcode that + // represents the 2 instruction sequence of an indirect branch and link, + // immediately followed by a load of the TOC pointer from the the stack save + // slot into gpr2. + if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) + return PPCISD::BCTRL_LOAD_TOC; + + // An indirect call that does not need a TOC restore. + return PPCISD::BCTRL; + } + + // The ABIs that maintain a TOC pointer accross calls need to have a nop + // immediately following the call instruction if the caller and callee may + // have different TOC bases. At link time if the linker determines the calls + // may not share a TOC base, the call is redirected to a trampoline inserted + // by the linker. The trampoline will (among other things) save the callers + // TOC pointer at an ABI designated offset in the linkage area and the linker + // will rewrite the nop to be a load of the TOC pointer from the linkage area + // into gpr2. + if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) + return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL + : PPCISD::CALL_NOP; + + return PPCISD::CALL; +} + +static bool isValidAIXExternalSymSDNode(StringRef SymName) { + return StringSwitch<bool>(SymName) + .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf", + "__moddi3", "__udivdi3", "__umoddi3", true) + .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true) + .Default(false); +} + +static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, + const SDLoc &dl, const PPCSubtarget &Subtarget) { + if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI()) + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + return SDValue(Dest, 0); + + // Returns true if the callee is local, and false otherwise. + auto isLocalCallee = [&]() { + const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + const GlobalValue *GV = G ? G->getGlobal() : nullptr; + + return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) && + !dyn_cast_or_null<GlobalIFunc>(GV); + }; + + // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in + // a static relocation model causes some versions of GNU LD (2.17.50, at + // least) to force BSS-PLT, instead of secure-PLT, even if all objects are + // built with secure-PLT. + bool UsePlt = + Subtarget.is32BitELFABI() && !isLocalCallee() && + Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_; + + // On AIX, direct function calls reference the symbol for the function's + // entry point, which is named by prepending a "." before the function's + // C-linkage name. + const auto getAIXFuncEntryPointSymbolSDNode = + [&](StringRef FuncName, bool IsDeclaration, + const XCOFF::StorageClass &SC) { + auto &Context = DAG.getMachineFunction().getMMI().getContext(); + + MCSymbolXCOFF *S = cast<MCSymbolXCOFF>( + Context.getOrCreateSymbol(Twine(".") + Twine(FuncName))); + + if (IsDeclaration && !S->hasContainingCsect()) { + // On AIX, an undefined symbol needs to be associated with a + // MCSectionXCOFF to get the correct storage mapping class. + // In this case, XCOFF::XMC_PR. + MCSectionXCOFF *Sec = Context.getXCOFFSection( + S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC, + SectionKind::getMetadata()); + S->setContainingCsect(Sec); + } + + MVT PtrVT = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + return DAG.getMCSymbol(S, PtrVT); + }; + + if (isFunctionGlobalAddress(Callee)) { + const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); + const GlobalValue *GV = G->getGlobal(); + + if (!Subtarget.isAIXABI()) + return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0, + UsePlt ? PPCII::MO_PLT : 0); + + assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX."); + const GlobalObject *GO = cast<GlobalObject>(GV); + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(), + SC); + } + + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *SymName = S->getSymbol(); + if (!Subtarget.isAIXABI()) + return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(), + UsePlt ? PPCII::MO_PLT : 0); + + // If there exists a user-declared function whose name is the same as the + // ExternalSymbol's, then we pick up the user-declared version. + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + if (const Function *F = + dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) { + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F); + return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(), + SC); + } + + // TODO: Remove this when the support for ExternalSymbolSDNode is complete. + if (isValidAIXExternalSymSDNode(SymName)) { + return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT); + } + + report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName)); + } + + // No transformation needed. + assert(Callee.getNode() && "What no callee?"); + return Callee; +} + +static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) { + assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START && + "Expected a CALLSEQ_STARTSDNode."); + + // The last operand is the chain, except when the node has glue. If the node + // has glue, then the last operand is the glue, and the chain is the second + // last operand. + SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1); + if (LastValue.getValueType() != MVT::Glue) + return LastValue; + + return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2); +} + +// Creates the node that moves a functions address into the count register +// to prepare for an indirect call instruction. +static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, + SDValue &Glue, SDValue &Chain, + const SDLoc &dl) { + SDValue MTCTROps[] = {Chain, Callee, Glue}; + EVT ReturnTypes[] = {MVT::Other, MVT::Glue}; + Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2), + makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2)); + // The glue is the second value produced. + Glue = Chain.getValue(1); +} + +static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, + SDValue &Glue, SDValue &Chain, + SDValue CallSeqStart, + ImmutableCallSite CS, const SDLoc &dl, + bool hasNest, + const PPCSubtarget &Subtarget) { + // Function pointers in the 64-bit SVR4 ABI do not point to the function + // entry point, but to the function descriptor (the function entry point + // address is part of the function descriptor though). + // The function descriptor is a three doubleword structure with the + // following fields: function entry point, TOC base address and + // environment pointer. + // Thus for a call through a function pointer, the following actions need + // to be performed: + // 1. Save the TOC of the caller in the TOC save area of its stack + // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). + // 2. Load the address of the function entry point from the function + // descriptor. + // 3. Load the TOC of the callee from the function descriptor into r2. + // 4. Load the environment pointer from the function descriptor into + // r11. + // 5. Branch to the function entry point address. + // 6. On return of the callee, the TOC of the caller needs to be + // restored (this is done in FinishCall()). + // + // The loads are scheduled at the beginning of the call sequence, and the + // register copies are flagged together to ensure that no other + // operations can be scheduled in between. E.g. without flagging the + // copies together, a TOC access in the caller could be scheduled between + // the assignment of the callee TOC and the branch to the callee, which leads + // to incorrect code. + + // Start by loading the function address from the descriptor. + SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart); + auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() + ? (MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant) + : MachineMemOperand::MONone; + + MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); + + // Registers used in building the DAG. + const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister(); + const MCRegister TOCReg = Subtarget.getTOCPointerRegister(); + + // Offsets of descriptor members. + const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset(); + const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset(); + + const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; + const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4; + + // One load for the functions entry point address. + SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI, + Alignment, MMOFlags); + + // One for loading the TOC anchor for the module that contains the called + // function. + SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff); + SDValue TOCPtr = + DAG.getLoad(RegVT, dl, LDChain, AddTOC, + MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags); + + // One for loading the environment pointer. + SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff); + SDValue LoadEnvPtr = + DAG.getLoad(RegVT, dl, LDChain, AddPtr, + MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags); + + + // Then copy the newly loaded TOC anchor to the TOC pointer. + SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue); + Chain = TOCVal.getValue(0); + Glue = TOCVal.getValue(1); + + // If the function call has an explicit 'nest' parameter, it takes the + // place of the environment pointer. + assert((!hasNest || !Subtarget.isAIXABI()) && + "Nest parameter is not supported on AIX."); + if (!hasNest) { + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue); + Chain = EnvVal.getValue(0); + Glue = EnvVal.getValue(1); + } + + // The rest of the indirect call sequence is the same as the non-descriptor + // DAG. + prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl); +} + +static void +buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv, + const SDLoc &dl, bool isTailCall, bool isVarArg, + bool isPatchPoint, bool hasNest, SelectionDAG &DAG, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, + SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, + const PPCSubtarget &Subtarget, bool isIndirect) { + const bool IsPPC64 = Subtarget.isPPC64(); + // MVT for a general purpose register. + const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; + + // First operand is always the chain. + Ops.push_back(Chain); + + // If it's a direct call pass the callee as the second operand. + if (!isIndirect) + Ops.push_back(Callee); + else { + assert(!isPatchPoint && "Patch point call are not indirect."); + + // For the TOC based ABIs, we have saved the TOC pointer to the linkage area + // on the stack (this would have been done in `LowerCall_64SVR4` or + // `LowerCall_AIX`). The call instruction is a pseudo instruction that + // represents both the indirect branch and a load that restores the TOC + // pointer from the linkage area. The operand for the TOC restore is an add + // of the TOC save offset to the stack pointer. This must be the second + // operand: after the chain input but before any other variadic arguments. + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + + SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); + unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); + SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff); + Ops.push_back(AddTOC); + } + + // Add the register used for the environment pointer. + if (Subtarget.usesFunctionDescriptors() && !hasNest) + Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(), + RegVT)); + + + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT)); + } + + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is + // no way to mark dependencies as implicit here. + // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. + if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint) + Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls - if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) + if (isVarArg && Subtarget.is32BitELFABI()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); - // When performing tail call optimization the callee pops its arguments off - // the stack. Account for this here so these bytes can be pushed back on in - // PPCFrameLowering::eliminateCallFramePseudoInstr. - int BytesCalleePops = - (CallConv == CallingConv::Fast && - getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = @@ -5236,8 +5416,40 @@ SDValue PPCTargetLowering::FinishCall( assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); - if (InFlag.getNode()) - Ops.push_back(InFlag); + // If the glue is valid, it is the last operand. + if (Glue.getNode()) + Ops.push_back(Glue); +} + +SDValue PPCTargetLowering::FinishCall( + CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, + bool isPatchPoint, bool hasNest, SelectionDAG &DAG, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue, + SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, + unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, + SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const { + + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) + setUsesTOCBasePtr(DAG); + + const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint); + unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall, + DAG.getMachineFunction().getFunction(), + Callee, Subtarget, DAG.getTarget()); + + if (!isIndirect) + Callee = transformCallee(Callee, DAG, dl, Subtarget); + else if (Subtarget.usesFunctionDescriptors()) + prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS, + dl, hasNest, Subtarget); + else + prepareIndirectCall(DAG, Callee, Glue, Chain, dl); + + // Build the operand list for the call instruction. + SmallVector<SDValue, 8> Ops; + buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint, + hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff, + Subtarget, isIndirect); // Emit tail call. if (isTailCall) { @@ -5246,81 +5458,32 @@ SDValue PPCTargetLowering::FinishCall( Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa<ConstantSDNode>(Callee)) && - "Expecting an global address, external symbol, absolute value or register"); - + "Expecting a global address, external symbol, absolute value or " + "register"); + assert(CallOpc == PPCISD::TC_RETURN && + "Unexpected call opcode for a tail call."); DAG.getMachineFunction().getFrameInfo().setHasTailCall(); - return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); + return DAG.getNode(CallOpc, dl, MVT::Other, Ops); } - // Add a NOP immediately after the branch instruction when using the 64-bit - // SVR4 or the AIX ABI. - // At link time, if caller and callee are in a different module and - // thus have a different TOC, the call will be replaced with a call to a stub - // function which saves the current TOC, loads the TOC of the callee and - // branches to the callee. The NOP will be replaced with a load instruction - // which restores the TOC of the caller from the TOC save slot of the current - // stack frame. If caller and callee belong to the same module (and have the - // same TOC), the NOP will remain unchanged, or become some other NOP. - - MachineFunction &MF = DAG.getMachineFunction(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - if (!isTailCall && !isPatchPoint && - ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) || - Subtarget.isAIXABI())) { - if (CallOpc == PPCISD::BCTRL) { - if (Subtarget.isAIXABI()) - report_fatal_error("Indirect call on AIX is not implemented."); - - // This is a call through a function pointer. - // Restore the caller TOC from the save area into R2. - // See PrepareCall() for more information about calls through function - // pointers in the 64-bit SVR4 ABI. - // We are using a target-specific load with r2 hard coded, because the - // result of a target-independent load would never go directly into r2, - // since r2 is a reserved register (which prevents the register allocator - // from allocating it), resulting in an additional register being - // allocated and an unnecessary move instruction being generated. - CallOpc = PPCISD::BCTRL_LOAD_TOC; - - SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); - unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); - SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}}; + Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops); + Glue = Chain.getValue(1); - // The address needs to go after the chain input but before the flag (or - // any other variadic arguments). - Ops.insert(std::next(Ops.begin()), AddTOC); - } else if (CallOpc == PPCISD::CALL && - !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) { - // Otherwise insert NOP for non-local calls. - CallOpc = PPCISD::CALL_NOP; - } - } - - if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) { - // On AIX, direct function calls reference the symbol for the function's - // entry point, which is named by inserting a "." before the function's - // C-linkage name. - GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); - auto &Context = DAG.getMachineFunction().getMMI().getContext(); - MCSymbol *S = Context.getOrCreateSymbol(Twine(".") + - Twine(G->getGlobal()->getName())); - Callee = DAG.getMCSymbol(S, PtrVT); - // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode. - Ops[1] = Callee; - } - - Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); - InFlag = Chain.getValue(1); + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCFrameLowering::eliminateCallFramePseudoInstr. + int BytesCalleePops = (CallConv == CallingConv::Fast && + getTargetMachine().Options.GuaranteedTailCallOpt) + ? NumBytes + : 0; Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(BytesCalleePops, dl, true), - InFlag, dl); - if (!Ins.empty()) - InFlag = Chain.getValue(1); + Glue, dl); + Glue = Chain.getValue(1); - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue @@ -6273,8 +6436,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). - // See PrepareCall() for more information about calls through function - // pointers in the 64-bit SVR4 ABI. + // See prepareDescriptorIndirectCall and buildCallOperands for more + // information about calls through function pointers in the 64-bit SVR4 ABI. if (!isTailCall && !isPatchPoint && !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { @@ -6695,6 +6858,205 @@ SDValue PPCTargetLowering::LowerCall_Darwin( NumBytes, Ins, InVals, CS); } +static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, + CCState &State) { + + if (ValVT == MVT::f128) + report_fatal_error("f128 is unimplemented on AIX."); + + if (ArgFlags.isByVal()) + report_fatal_error("Passing structure by value is unimplemented."); + + if (ArgFlags.isNest()) + report_fatal_error("Nest arguments are unimplemented."); + + if (ValVT.isVector() || LocVT.isVector()) + report_fatal_error("Vector arguments are unimplemented on AIX."); + + const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>( + State.getMachineFunction().getSubtarget()); + const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrByteSize = IsPPC64 ? 8 : 4; + + static const MCPhysReg GPR_32[] = {// 32-bit registers. + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10}; + static const MCPhysReg GPR_64[] = {// 64-bit registers. + PPC::X3, PPC::X4, PPC::X5, PPC::X6, + PPC::X7, PPC::X8, PPC::X9, PPC::X10}; + + // Arguments always reserve parameter save area. + switch (ValVT.SimpleTy) { + default: + report_fatal_error("Unhandled value type for argument."); + case MVT::i64: + // i64 arguments should have been split to i32 for PPC32. + assert(IsPPC64 && "PPC32 should have split i64 values."); + LLVM_FALLTHROUGH; + case MVT::i1: + case MVT::i32: + State.AllocateStack(PtrByteSize, PtrByteSize); + if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; + // Promote integers if needed. + if (ValVT.getSizeInBits() < RegVT.getSizeInBits()) + LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt + : CCValAssign::LocInfo::ZExt; + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); + } + else + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + return false; + + case MVT::f32: + case MVT::f64: { + // Parameter save area (PSA) is reserved even if the float passes in fpr. + const unsigned StoreSize = LocVT.getStoreSize(); + // Floats are always 4-byte aligned in the PSA on AIX. + // This includes f64 in 64-bit mode for ABI compatibility. + State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4); + if (unsigned Reg = State.AllocateReg(FPR)) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + + // AIX requires that GPRs are reserved for float arguments. + // Successfully reserved GPRs are only initialized for vararg calls. + MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; + for (unsigned I = 0; I < StoreSize; I += PtrByteSize) { + if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { + if (State.isVarArg()) { + // Custom handling is required for: + // f64 in PPC32 needs to be split into 2 GPRs. + // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR. + State.addLoc( + CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo)); + } + } else if (State.isVarArg()) { + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + } + } + + return false; + } + } + return true; +} + +static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, + bool IsPPC64) { + assert((IsPPC64 || SVT != MVT::i64) && + "i64 should have been split for 32-bit codegen."); + + switch (SVT) { + default: + report_fatal_error("Unexpected value type for formal argument"); + case MVT::i1: + case MVT::i32: + case MVT::i64: + return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + case MVT::f32: + return &PPC::F4RCRegClass; + case MVT::f64: + return &PPC::F8RCRegClass; + } +} + +static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, + SelectionDAG &DAG, SDValue ArgValue, + MVT LocVT, const SDLoc &dl) { + assert(ValVT.isScalarInteger() && LocVT.isScalarInteger()); + assert(ValVT.getSizeInBits() < LocVT.getSizeInBits()); + + if (Flags.isSExt()) + ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, + DAG.getValueType(ValVT)); + else if (Flags.isZExt()) + ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, + DAG.getValueType(ValVT)); + + return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue); +} + +SDValue PPCTargetLowering::LowerFormalArguments_AIX( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + + assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold || + CallConv == CallingConv::Fast) && + "Unexpected calling convention!"); + + if (isVarArg) + report_fatal_error("This call type is unimplemented on AIX."); + + if (getTargetMachine().Options.GuaranteedTailCallOpt) + report_fatal_error("Tail call support is unimplemented on AIX."); + + if (useSoftFloat()) + report_fatal_error("Soft float support is unimplemented on AIX."); + + const PPCSubtarget &Subtarget = + static_cast<const PPCSubtarget &>(DAG.getSubtarget()); + if (Subtarget.hasQPX()) + report_fatal_error("QPX support is not supported on AIX."); + + const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrByteSize = IsPPC64 ? 8 : 4; + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + MachineFunction &MF = DAG.getMachineFunction(); + CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); + + // Reserve space for the linkage area on the stack. + const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + // On AIX a minimum of 8 words is saved to the parameter save area. + const unsigned MinParameterSaveArea = 8 * PtrByteSize; + CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize); + CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (VA.isRegLoc()) { + EVT ValVT = VA.getValVT(); + MVT LocVT = VA.getLocVT(); + MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); + ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); + if (ValVT.isScalarInteger() && + (ValVT.getSizeInBits() < LocVT.getSizeInBits())) { + ArgValue = + truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); + } + InVals.push_back(ArgValue); + } else { + report_fatal_error("Handling of formal arguments on the stack is " + "unimplemented!"); + } + } + + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea = CCInfo.getNextStackOffset(); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so + // that taking the difference between two stack areas will result in an + // aligned stack. + MinReservedArea = + EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setMinReservedArea(MinReservedArea); + + return Chain; +} SDValue PPCTargetLowering::LowerCall_AIX( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, @@ -6705,22 +7067,33 @@ SDValue PPCTargetLowering::LowerCall_AIX( SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const { - assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && - "Unimplemented calling convention!"); - if (isVarArg || isPatchPoint) + assert((CallConv == CallingConv::C || + CallConv == CallingConv::Cold || + CallConv == CallingConv::Fast) && "Unexpected calling convention!"); + + if (isPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - bool isPPC64 = PtrVT == MVT::i64; - unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned NumOps = Outs.size(); + const PPCSubtarget& Subtarget = + static_cast<const PPCSubtarget&>(DAG.getSubtarget()); + if (Subtarget.hasQPX()) + report_fatal_error("QPX is not supported on AIX."); + if (Subtarget.hasAltivec()) + report_fatal_error("Altivec support is unimplemented on AIX."); + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); - // Count how many bytes are to be pushed on the stack, including the linkage - // area, parameter list area. - // On XCOFF, we start with 24/48, which is reserved space for - // [SP][CR][LR][2 x reserved][TOC]. - unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + // Reserve space for the linkage save area (LSA) on the stack. + // In both PPC32 and PPC64 there are 6 reserved slots in the LSA: + // [SP][CR][LR][2 x reserved][TOC]. + // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64. + const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrByteSize = IsPPC64 ? 8 : 4; + CCInfo.AllocateStack(LinkageSize, PtrByteSize); + CCInfo.AnalyzeCallOperands(Outs, CC_AIX); // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if the callee @@ -6728,98 +7101,101 @@ SDValue PPCTargetLowering::LowerCall_AIX( // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - unsigned NumBytes = LinkageSize + 8 * PtrByteSize; + const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize; + const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize; // Adjust the stack pointer for the new arguments... - // These operations are automatically eliminated by the prolog/epilog - // inserter pass. + // These operations are automatically eliminated by the prolog/epilog pass. Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; - static const MCPhysReg GPR_32[] = { // 32-bit registers. - PPC::R3, PPC::R4, PPC::R5, PPC::R6, - PPC::R7, PPC::R8, PPC::R9, PPC::R10 - }; - static const MCPhysReg GPR_64[] = { // 64-bit registers. - PPC::X3, PPC::X4, PPC::X5, PPC::X6, - PPC::X7, PPC::X8, PPC::X9, PPC::X10 - }; - - const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64) - : array_lengthof(GPR_32); - const unsigned NumFPRs = array_lengthof(FPR); - assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing " - "on AIX"); + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; - const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; - unsigned GPR_idx = 0, FPR_idx = 0; + for (unsigned I = 0, E = ArgLocs.size(); I != E;) { + CCValAssign &VA = ArgLocs[I++]; - SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + if (VA.isMemLoc()) + report_fatal_error("Handling of placing parameters on the stack is " + "unimplemented!"); + if (!VA.isRegLoc()) + report_fatal_error( + "Unexpected non-register location for function call argument."); - if (isTailCall) - report_fatal_error("Handling of tail call is unimplemented!"); - int SPDiff = 0; + SDValue Arg = OutVals[VA.getValNo()]; - for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (!VA.needsCustom()) { + switch (VA.getLocInfo()) { + default: + report_fatal_error("Unexpected argument extension type."); + case CCValAssign::Full: + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - // Promote integers if needed. - if (Arg.getValueType() == MVT::i1 || - (isPPC64 && Arg.getValueType() == MVT::i32)) { - unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg); + continue; } - // Note: "by value" is code for passing a structure by value, not - // basic types. - if (Flags.isByVal()) - report_fatal_error("Passing structure by value is unimplemented!"); + // Custom handling is used for GPR initializations for vararg float + // arguments. + assert(isVarArg && VA.getValVT().isFloatingPoint() && + VA.getLocVT().isInteger() && + "Unexpected custom register handling for calling convention."); - switch (Arg.getSimpleValueType().SimpleTy) { - default: llvm_unreachable("Unexpected ValueType for argument!"); - case MVT::i1: - case MVT::i32: - case MVT::i64: - if (GPR_idx != NumGPRs) - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); - else - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); - break; - case MVT::f32: - case MVT::f64: - if (FPR_idx != NumFPRs) { - RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); + SDValue ArgAsInt = + DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg); - // If we have any FPRs remaining, we may also have GPRs remaining. - // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available - // GPRs. - if (GPR_idx != NumGPRs) - ++GPR_idx; - if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64) - ++GPR_idx; - } else - report_fatal_error("Handling of placing parameters on the stack is " - "unimplemented!"); - break; - case MVT::v4f32: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v16i8: - case MVT::v2f64: - case MVT::v2i64: - case MVT::v1i128: - case MVT::f128: - case MVT::v4f64: - case MVT::v4i1: - report_fatal_error("Handling of this parameter type is unimplemented!"); - } - } + if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize()) + // f32 in 32-bit GPR + // f64 in 64-bit GPR + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt)); + else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits()) + // f32 in 64-bit GPR. + RegsToPass.push_back(std::make_pair( + VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT()))); + else { + // f64 in two 32-bit GPRs + // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs. + assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 && + "Unexpected custom register for argument!"); + CCValAssign &GPR1 = VA; + SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt, + DAG.getConstant(32, dl, MVT::i8)); + RegsToPass.push_back(std::make_pair( + GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32))); + assert(I != E && "A second custom GPR is expected!"); + CCValAssign &GPR2 = ArgLocs[I++]; + assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() && + GPR2.needsCustom() && "A second custom GPR is expected!"); + RegsToPass.push_back(std::make_pair( + GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32))); + } + } + + // For indirect calls, we need to save the TOC base to the stack for + // restoration after the call. + if (!isTailCall && !isPatchPoint && + !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) { + const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister(); + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; + const unsigned TOCSaveOffset = + Subtarget.getFrameLowering()->getTOCSaveOffset(); - if (!isFunctionGlobalAddress(Callee) && - !isa<ExternalSymbolSDNode>(Callee)) - report_fatal_error("Handling of indirect call is unimplemented!"); + setUsesTOCBasePtr(DAG); + SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); + } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -6829,10 +7205,11 @@ SDValue PPCTargetLowering::LowerCall_AIX( InFlag = Chain.getValue(1); } + const int SPDiff = 0; return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, - /* unused except on PPC64 ELFv1 */ false, DAG, - RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, - NumBytes, Ins, InVals, CS); + /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass, + InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, + InVals, CS); } bool @@ -7121,8 +7498,7 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { "Custom lowering only for i1 results"); SDLoc DL(Op); - return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, - Op.getOperand(0)); + return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0)); } SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, @@ -7188,17 +7564,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { !Op.getOperand(2).getValueType().isFloatingPoint()) return Op; + bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath; + bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath; // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. - if (!DAG.getTarget().Options.NoInfsFPMath || - !DAG.getTarget().Options.NoNaNsFPMath) + // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the + // presence of infinities. + if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs)) return Op; - // TODO: Propagate flags from the select rather than global settings. - SDNodeFlags Flags; - Flags.setNoInfs(true); - Flags.setNoNaNs(true); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); @@ -7207,6 +7581,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) { + switch (CC) { + default: + // Not a min/max but with finite math, we may still be able to use fsel. + if (HasNoInfs && HasNoNaNs) + break; + return Op; + case ISD::SETOGT: + case ISD::SETGT: + return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS); + case ISD::SETOLT: + case ISD::SETLT: + return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS); + } + } + + // TODO: Propagate flags from the select rather than global settings. + SDNodeFlags Flags; + Flags.setNoInfs(true); + Flags.setNoNaNs(true); + // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; @@ -8055,8 +8450,6 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { /// SplatSize. Cast the result to VT. static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { - assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); - static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; @@ -8376,29 +8769,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } // We have XXSPLTIB for constant splats one byte wide - if (Subtarget.hasP9Vector() && SplatSize == 1) { - // This is a splat of 1-byte elements with some elements potentially undef. - // Rather than trying to match undef in the SDAG patterns, ensure that all - // elements are the same constant. - if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) { - SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits, - dl, MVT::i32)); - SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops); - if (Op.getValueType() != MVT::v16i8) - return DAG.getBitcast(Op.getValueType(), NewBV); - return NewBV; - } - - // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll - // detect that constant splats like v8i16: 0xABAB are really just splats - // of a 1-byte constant. In this case, we need to convert the node to a - // splat of v16i8 and a bitcast. - if (Op.getValueType() != MVT::v16i8) - return DAG.getBitcast(Op.getValueType(), - DAG.getConstant(SplatBits, dl, MVT::v16i8)); - - return Op; - } + // FIXME: SplatBits is an unsigned int being cast to an int while passing it + // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here. + if (Subtarget.hasP9Vector() && SplatSize == 1) + return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl); // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> @@ -8930,19 +9304,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (Subtarget.hasP9Vector()) { if (PPC::isXXBRHShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); - SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); + SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); } else if (PPC::isXXBRWShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); - SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); + SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); } else if (PPC::isXXBRDShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); - SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); + SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); } else if (PPC::isXXBRQShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); - SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); + SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); } } @@ -9503,7 +9877,7 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), Op.getOperand(0)); // XXBRD - Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op); + Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op); // MFVSRD int VectorIndex = 0; if (Subtarget.isLittleEndian()) @@ -10845,9 +11219,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, DebugLoc dl = MI.getDebugLoc(); TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); - } else if (MI.getOpcode() == PPC::SELECT_CC_I4 || - MI.getOpcode() == PPC::SELECT_CC_I8 || - MI.getOpcode() == PPC::SELECT_CC_F4 || + } else if (MI.getOpcode() == PPC::SELECT_CC_F4 || MI.getOpcode() == PPC::SELECT_CC_F8 || MI.getOpcode() == PPC::SELECT_CC_F16 || MI.getOpcode() == PPC::SELECT_CC_QFRC || @@ -10859,8 +11231,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.getOpcode() == PPC::SELECT_CC_VSRC || MI.getOpcode() == PPC::SELECT_CC_SPE4 || MI.getOpcode() == PPC::SELECT_CC_SPE || - MI.getOpcode() == PPC::SELECT_I4 || - MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_F16 || @@ -11397,28 +11767,28 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); - } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || - MI.getOpcode() == PPC::ANDIo_1_GT_BIT || - MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || - MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) { - unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 || - MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) - ? PPC::ANDIo8 - : PPC::ANDIo; - bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT || - MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8); + } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT || + MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT || + MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 || + MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) { + unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 || + MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) + ? PPC::ANDI8_rec + : PPC::ANDI_rec; + bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT || + MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register Dest = RegInfo.createVirtualRegister( - Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); + Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); - DebugLoc dl = MI.getDebugLoc(); - BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) + DebugLoc Dl = MI.getDebugLoc(); + BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest) .addReg(MI.getOperand(1).getReg()) .addImm(1); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), + BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) - .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); + .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT); } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); @@ -11638,7 +12008,7 @@ unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are two or more FDIVs (for embedded cores with only // one FP pipeline) for three or more FDIVs (for generic OOO cores). - switch (Subtarget.getDarwinDirective()) { + switch (Subtarget.getCPUDirective()) { default: return 3; case PPC::DIR_440: @@ -14111,7 +14481,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, } Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { - switch (Subtarget.getDarwinDirective()) { + switch (Subtarget.getCPUDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_PWR4: @@ -14121,7 +14491,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: - case PPC::DIR_PWR9: { + case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: { if (!ML) break; @@ -14309,6 +14680,17 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::VSFRCRegClass); } + // If we name a VSX register, we can't defer to the base class because it + // will not recognize the correct register (their names will be VSL{0-31} + // and V{0-31} so they won't match). So we match them here. + if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { + int VSNum = atoi(Constraint.data() + 3); + assert(VSNum >= 0 && VSNum <= 63 && + "Attempted to access a vsr out of range"); + if (VSNum < 32) + return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); + return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + } std::pair<unsigned, const TargetRegisterClass *> R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); @@ -14513,16 +14895,15 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. -Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT, +Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { bool isPPC64 = Subtarget.isPPC64(); bool IsDarwinABI = Subtarget.isDarwinABI(); - if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || - (!isPPC64 && VT != MVT::i32)) + bool is64Bit = isPPC64 && VT == LLT::scalar(64); + if (!is64Bit && VT != LLT::scalar(32)) report_fatal_error("Invalid register global variable type"); - bool is64Bit = isPPC64 && VT == MVT::i64; Register Reg = StringSwitch<Register>(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) .Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2) @@ -14870,6 +15251,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple()) return false; + if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess()) + return false; + if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && @@ -14889,7 +15273,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } -bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) @@ -15278,7 +15663,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { return SDValue(); auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool { - switch (this->Subtarget.getDarwinDirective()) { + switch (this->Subtarget.getCPUDirective()) { default: // TODO: enhance the condition for subtarget before pwr8 return false; @@ -15288,6 +15673,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { // vector 7 2 2 return true; case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 // vector 7 2 2 @@ -15357,12 +15743,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!CI->isTailCall()) return false; - // If tail calls are disabled for the caller then we are done. - const Function *Caller = CI->getParent()->getParent(); - auto Attr = Caller->getFnAttribute("disable-tail-calls"); - if (Attr.getValueAsString() == "true") - return false; - // If sibling calls have been disabled and tail-calls aren't guaranteed // there is no reason to duplicate. auto &TM = getTargetMachine(); @@ -15375,6 +15755,7 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return false; // Make sure the callee and caller calling conventions are eligible for tco. + const Function *Caller = CI->getParent()->getParent(); if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), CI->getCallingConv())) return false; |