summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp1357
1 files changed, 869 insertions, 488 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8cf6a660b08b..60ed72e1018b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -52,6 +52,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
@@ -66,6 +67,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -119,6 +121,9 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
cl::desc("enable quad precision float support on ppc"), cl::Hidden);
+static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
+cl::desc("use absolute jump tables on ppc"), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
@@ -132,10 +137,6 @@ extern cl::opt<bool> ANDIGlueBug;
PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- // Use _setjmp/_longjmp instead of setjmp/longjmp.
- setUseUnderscoreSetJmp(true);
- setUseUnderscoreLongJmp(true);
-
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
bool isPPC64 = Subtarget.isPPC64();
@@ -389,6 +390,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Legal);
setOperationAction(ISD::BITCAST, MVT::i64, Legal);
setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::LRINT, MVT::f64, Legal);
+ setOperationAction(ISD::LRINT, MVT::f32, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f64, Legal);
+ setOperationAction(ISD::LLRINT, MVT::f32, Legal);
+ setOperationAction(ISD::LROUND, MVT::f64, Legal);
+ setOperationAction(ISD::LROUND, MVT::f32, Legal);
+ setOperationAction(ISD::LLROUND, MVT::f64, Legal);
+ setOperationAction(ISD::LLROUND, MVT::f32, Legal);
+ }
} else {
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -548,6 +559,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+ }
+
if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
@@ -702,6 +720,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (!Subtarget.hasP8Altivec())
setOperationAction(ISD::ABS, MVT::v2i64, Expand);
+ // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
+ if (Subtarget.hasAltivec())
+ for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
+ setOperationAction(ISD::ROTL, VT, Legal);
+ // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
+
addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
@@ -756,13 +782,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ // The nearbyint variants are not allowed to raise the inexact exception
+ // so we can only code-gen them with unsafe math.
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ }
+
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
@@ -910,12 +946,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FREM, MVT::f128, Expand);
}
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
-
+ setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
+ setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
}
if (Subtarget.hasP9Altivec()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
}
@@ -1183,7 +1230,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.isDarwin())
setPrefFunctionAlignment(Align(16));
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_A2:
@@ -1198,6 +1245,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
break;
@@ -1212,15 +1260,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// The Freescale cores do better with aggressive inlining of memcpy and
// friends. GCC uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
+ Subtarget.getCPUDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
- } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
// The A2 also benefits from (very) aggressive inlining of memcpy and
// friends. The overhead of a the function call, even when warm, can be
// over one hundred cycles.
@@ -1294,6 +1342,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
+ case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
@@ -1314,7 +1364,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::VPERM: return "PPCISD::VPERM";
case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
- case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::VECSHL: return "PPCISD::VECSHL";
case PPCISD::CMPB: return "PPCISD::CMPB";
@@ -1345,8 +1394,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
- case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT";
- case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT";
+ case PPCISD::ANDI_rec_1_EQ_BIT:
+ return "PPCISD::ANDI_rec_1_EQ_BIT";
+ case PPCISD::ANDI_rec_1_GT_BIT:
+ return "PPCISD::ANDI_rec_1_GT_BIT";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -2699,9 +2750,9 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
const Constant *C = CP->getConstVal();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return getTOCEntry(DAG, SDLoc(CP), GA);
@@ -2735,14 +2786,16 @@ unsigned PPCTargetLowering::getJumpTableEncoding() const {
}
bool PPCTargetLowering::isJumpTableRelative() const {
- if (Subtarget.isPPC64())
+ if (UseAbsoluteJumpTables)
+ return false;
+ if (Subtarget.isPPC64() || Subtarget.isAIXABI())
return true;
return TargetLowering::isJumpTableRelative();
}
SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
- if (!Subtarget.isPPC64())
+ if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
switch (getTargetMachine().getCodeModel()) {
@@ -2759,7 +2812,7 @@ const MCExpr *
PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,
MCContext &Ctx) const {
- if (!Subtarget.isPPC64())
+ if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
switch (getTargetMachine().getCodeModel()) {
@@ -2775,9 +2828,9 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return getTOCEntry(DAG, SDLoc(JT), GA);
@@ -2804,9 +2857,9 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
const BlockAddress *BA = BASDN->getBlockAddress();
- // 64-bit SVR4 ABI code is always position-independent.
+ // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.is64BitELFABI()) {
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
return getTOCEntry(DAG, SDLoc(BASDN), GA);
@@ -3129,11 +3182,17 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
+
return Op.getOperand(0);
}
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
+ if (Subtarget.isAIXABI())
+ report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
+
SDValue Chain = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
@@ -3394,15 +3453,16 @@ SDValue PPCTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ if (Subtarget.isAIXABI())
+ return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
+ InVals);
if (Subtarget.is64BitELFABI())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- else if (Subtarget.is32BitELFABI())
+ if (Subtarget.is32BitELFABI())
return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
- // FIXME: We are using this for both AIX and Darwin. We should add appropriate
- // AIX testing, and rename it appropriately.
return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
InVals);
}
@@ -4934,213 +4994,6 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
return false;
}
-static unsigned
-PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
- SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall,
- bool isPatchPoint, bool hasNest,
- SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
- SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- ImmutableCallSite CS, const PPCSubtarget &Subtarget) {
- bool isPPC64 = Subtarget.isPPC64();
- bool isSVR4ABI = Subtarget.isSVR4ABI();
- bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI();
- bool isAIXABI = Subtarget.isAIXABI();
-
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
- NodeTys.push_back(MVT::Other); // Returns a chain
- NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
-
- unsigned CallOpc = PPCISD::CALL;
-
- bool needIndirectCall = true;
- if (!isSVR4ABI || !isPPC64)
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
-
- // PC-relative references to external symbols should go through $stub, unless
- // we're building with the leopard linker or later, which automatically
- // synthesizes these stubs.
- const TargetMachine &TM = DAG.getTarget();
- const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
- const GlobalValue *GV = nullptr;
- if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
- GV = G->getGlobal();
- bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
- bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64;
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress /
- // TargetExternalSymbol node so that legalize doesn't hack it.
- if (isFunctionGlobalAddress(Callee)) {
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
-
- // A call to a TLS address is actually an indirect call to a
- // thread-specific pointer.
- unsigned OpFlags = 0;
- if (UsePlt)
- OpFlags = PPCII::MO_PLT;
-
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType(), 0, OpFlags);
- needIndirectCall = false;
- }
-
- if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- unsigned char OpFlags = 0;
-
- if (UsePlt)
- OpFlags = PPCII::MO_PLT;
-
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
- OpFlags);
- needIndirectCall = false;
- }
-
- if (isPatchPoint) {
- // We'll form an invalid direct call when lowering a patchpoint; the full
- // sequence for an indirect call is complicated, and many of the
- // instructions introduced might have side effects (and, thus, can't be
- // removed later). The call itself will be removed as soon as the
- // argument/return lowering is complete, so the fact that it has the wrong
- // kind of operands should not really matter.
- needIndirectCall = false;
- }
-
- if (needIndirectCall) {
- // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
- // to do the call, we can't use PPCISD::CALL.
- SDValue MTCTROps[] = {Chain, Callee, InFlag};
-
- if (is64BitELFv1ABI) {
- // Function pointers in the 64-bit SVR4 ABI do not point to the function
- // entry point, but to the function descriptor (the function entry point
- // address is part of the function descriptor though).
- // The function descriptor is a three doubleword structure with the
- // following fields: function entry point, TOC base address and
- // environment pointer.
- // Thus for a call through a function pointer, the following actions need
- // to be performed:
- // 1. Save the TOC of the caller in the TOC save area of its stack
- // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
- // 2. Load the address of the function entry point from the function
- // descriptor.
- // 3. Load the TOC of the callee from the function descriptor into r2.
- // 4. Load the environment pointer from the function descriptor into
- // r11.
- // 5. Branch to the function entry point address.
- // 6. On return of the callee, the TOC of the caller needs to be
- // restored (this is done in FinishCall()).
- //
- // The loads are scheduled at the beginning of the call sequence, and the
- // register copies are flagged together to ensure that no other
- // operations can be scheduled in between. E.g. without flagging the
- // copies together, a TOC access in the caller could be scheduled between
- // the assignment of the callee TOC and the branch to the callee, which
- // results in the TOC access going through the TOC of the callee instead
- // of going through the TOC of the caller, which leads to incorrect code.
-
- // Load the address of the function entry point from the function
- // descriptor.
- SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
- if (LDChain.getValueType() == MVT::Glue)
- LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
-
- auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
- ? (MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant)
- : MachineMemOperand::MONone;
-
- MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
- SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
- /* Alignment = */ 8, MMOFlags);
-
- // Load environment pointer into r11.
- SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
- SDValue LoadEnvPtr =
- DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16),
- /* Alignment = */ 8, MMOFlags);
-
- SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
- SDValue TOCPtr =
- DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8),
- /* Alignment = */ 8, MMOFlags);
-
- setUsesTOCBasePtr(DAG);
- SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
- InFlag);
- Chain = TOCVal.getValue(0);
- InFlag = TOCVal.getValue(1);
-
- // If the function call has an explicit 'nest' parameter, it takes the
- // place of the environment pointer.
- if (!hasNest) {
- SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
- InFlag);
-
- Chain = EnvVal.getValue(0);
- InFlag = EnvVal.getValue(1);
- }
-
- MTCTROps[0] = Chain;
- MTCTROps[1] = LoadFuncPtr;
- MTCTROps[2] = InFlag;
- }
-
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
- makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
- InFlag = Chain.getValue(1);
-
- NodeTys.clear();
- NodeTys.push_back(MVT::Other);
- NodeTys.push_back(MVT::Glue);
- Ops.push_back(Chain);
- CallOpc = PPCISD::BCTRL;
- Callee.setNode(nullptr);
- // Add use of X11 (holding environment pointer)
- if (is64BitELFv1ABI && !hasNest)
- Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
- // Add CTR register as callee so a bctr can be emitted later.
- if (isTailCall)
- Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
- }
-
- // If this is a direct call, pass the chain and the callee.
- if (Callee.getNode()) {
- Ops.push_back(Chain);
- Ops.push_back(Callee);
- }
- // If this is a tail call add stack pointer delta.
- if (isTailCall)
- Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
- // live into the call.
- // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
- if ((isSVR4ABI && isPPC64) || isAIXABI) {
- setUsesTOCBasePtr(DAG);
-
- // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
- // no way to mark dependencies as implicit here.
- // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
- if (!isPatchPoint)
- Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2
- : PPC::R2, PtrVT));
- }
-
- return CallOpc;
-}
-
SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -5205,30 +5058,357 @@ SDValue PPCTargetLowering::LowerCallResult(
return Chain;
}
-SDValue PPCTargetLowering::FinishCall(
- CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
- bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue InFlag,
- SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
- unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
- std::vector<EVT> NodeTys;
- SmallVector<SDValue, 8> Ops;
- unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
- SPDiff, isTailCall, isPatchPoint, hasNest,
- RegsToPass, Ops, NodeTys, CS, Subtarget);
+static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget, bool isPatchPoint) {
+ // PatchPoint calls are not indirect.
+ if (isPatchPoint)
+ return false;
+
+ if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
+ // becuase the immediate function pointer points to a descriptor instead of
+ // a function entry point. The ELFv2 ABI cannot use a BLA because the function
+ // pointer immediate points to the global entry point, while the BLA would
+ // need to jump to the local entry point (see rL211174).
+ if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
+ isBLACompatibleAddress(Callee, DAG))
+ return false;
+
+ return true;
+}
+
+static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint,
+ bool isTailCall, const Function &Caller,
+ const SDValue &Callee,
+ const PPCSubtarget &Subtarget,
+ const TargetMachine &TM) {
+ if (isTailCall)
+ return PPCISD::TC_RETURN;
+
+ // This is a call through a function pointer.
+ if (isIndirectCall) {
+ // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
+ // indirect calls. The save of the caller's TOC pointer to the stack will be
+ // inserted into the DAG as part of call lowering. The restore of the TOC
+ // pointer is modeled by using a pseudo instruction for the call opcode that
+ // represents the 2 instruction sequence of an indirect branch and link,
+ // immediately followed by a load of the TOC pointer from the the stack save
+ // slot into gpr2.
+ if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ return PPCISD::BCTRL_LOAD_TOC;
+
+ // An indirect call that does not need a TOC restore.
+ return PPCISD::BCTRL;
+ }
+
+ // The ABIs that maintain a TOC pointer accross calls need to have a nop
+ // immediately following the call instruction if the caller and callee may
+ // have different TOC bases. At link time if the linker determines the calls
+ // may not share a TOC base, the call is redirected to a trampoline inserted
+ // by the linker. The trampoline will (among other things) save the callers
+ // TOC pointer at an ABI designated offset in the linkage area and the linker
+ // will rewrite the nop to be a load of the TOC pointer from the linkage area
+ // into gpr2.
+ if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+ : PPCISD::CALL_NOP;
+
+ return PPCISD::CALL;
+}
+
+static bool isValidAIXExternalSymSDNode(StringRef SymName) {
+ return StringSwitch<bool>(SymName)
+ .Cases("__divdi3", "__fixunsdfdi", "__floatundidf", "__floatundisf",
+ "__moddi3", "__udivdi3", "__umoddi3", true)
+ .Cases("ceil", "floor", "memcpy", "memmove", "memset", "round", true)
+ .Default(false);
+}
+
+static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
+ const SDLoc &dl, const PPCSubtarget &Subtarget) {
+ if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ return SDValue(Dest, 0);
+
+ // Returns true if the callee is local, and false otherwise.
+ auto isLocalCallee = [&]() {
+ const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+
+ return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
+ !dyn_cast_or_null<GlobalIFunc>(GV);
+ };
+
+ // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
+ // a static relocation model causes some versions of GNU LD (2.17.50, at
+ // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
+ // built with secure-PLT.
+ bool UsePlt =
+ Subtarget.is32BitELFABI() && !isLocalCallee() &&
+ Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+ // On AIX, direct function calls reference the symbol for the function's
+ // entry point, which is named by prepending a "." before the function's
+ // C-linkage name.
+ const auto getAIXFuncEntryPointSymbolSDNode =
+ [&](StringRef FuncName, bool IsDeclaration,
+ const XCOFF::StorageClass &SC) {
+ auto &Context = DAG.getMachineFunction().getMMI().getContext();
+
+ MCSymbolXCOFF *S = cast<MCSymbolXCOFF>(
+ Context.getOrCreateSymbol(Twine(".") + Twine(FuncName)));
+
+ if (IsDeclaration && !S->hasContainingCsect()) {
+ // On AIX, an undefined symbol needs to be associated with a
+ // MCSectionXCOFF to get the correct storage mapping class.
+ // In this case, XCOFF::XMC_PR.
+ MCSectionXCOFF *Sec = Context.getXCOFFSection(
+ S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, SC,
+ SectionKind::getMetadata());
+ S->setContainingCsect(Sec);
+ }
+
+ MVT PtrVT =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ return DAG.getMCSymbol(S, PtrVT);
+ };
+
+ if (isFunctionGlobalAddress(Callee)) {
+ const GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G->getGlobal();
+
+ if (!Subtarget.isAIXABI())
+ return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
+ UsePlt ? PPCII::MO_PLT : 0);
+
+ assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
+ const GlobalObject *GO = cast<GlobalObject>(GV);
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ return getAIXFuncEntryPointSymbolSDNode(GO->getName(), GO->isDeclaration(),
+ SC);
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *SymName = S->getSymbol();
+ if (!Subtarget.isAIXABI())
+ return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
+ UsePlt ? PPCII::MO_PLT : 0);
+
+ // If there exists a user-declared function whose name is the same as the
+ // ExternalSymbol's, then we pick up the user-declared version.
+ const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
+ if (const Function *F =
+ dyn_cast_or_null<Function>(Mod->getNamedValue(SymName))) {
+ const XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(F);
+ return getAIXFuncEntryPointSymbolSDNode(F->getName(), F->isDeclaration(),
+ SC);
+ }
+
+ // TODO: Remove this when the support for ExternalSymbolSDNode is complete.
+ if (isValidAIXExternalSymSDNode(SymName)) {
+ return getAIXFuncEntryPointSymbolSDNode(SymName, true, XCOFF::C_EXT);
+ }
+
+ report_fatal_error("Unexpected ExternalSymbolSDNode: " + Twine(SymName));
+ }
+
+ // No transformation needed.
+ assert(Callee.getNode() && "What no callee?");
+ return Callee;
+}
+
+static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
+ assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
+ "Expected a CALLSEQ_STARTSDNode.");
+
+ // The last operand is the chain, except when the node has glue. If the node
+ // has glue, then the last operand is the glue, and the chain is the second
+ // last operand.
+ SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
+ if (LastValue.getValueType() != MVT::Glue)
+ return LastValue;
+
+ return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
+}
+
+// Creates the node that moves a functions address into the count register
+// to prepare for an indirect call instruction.
+static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+ SDValue &Glue, SDValue &Chain,
+ const SDLoc &dl) {
+ SDValue MTCTROps[] = {Chain, Callee, Glue};
+ EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
+ makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
+ // The glue is the second value produced.
+ Glue = Chain.getValue(1);
+}
+
+static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
+ SDValue &Glue, SDValue &Chain,
+ SDValue CallSeqStart,
+ ImmutableCallSite CS, const SDLoc &dl,
+ bool hasNest,
+ const PPCSubtarget &Subtarget) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // The loads are scheduled at the beginning of the call sequence, and the
+ // register copies are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // copies together, a TOC access in the caller could be scheduled between
+ // the assignment of the callee TOC and the branch to the callee, which leads
+ // to incorrect code.
+
+ // Start by loading the function address from the descriptor.
+ SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
+ auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
+ ? (MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant)
+ : MachineMemOperand::MONone;
+
+ MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr);
+
+ // Registers used in building the DAG.
+ const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
+ const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
+
+ // Offsets of descriptor members.
+ const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
+ const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
+
+ const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
+
+ // One load for the functions entry point address.
+ SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
+ Alignment, MMOFlags);
+
+ // One for loading the TOC anchor for the module that contains the called
+ // function.
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
+ SDValue TOCPtr =
+ DAG.getLoad(RegVT, dl, LDChain, AddTOC,
+ MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
+
+ // One for loading the environment pointer.
+ SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
+ SDValue LoadEnvPtr =
+ DAG.getLoad(RegVT, dl, LDChain, AddPtr,
+ MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
+
+
+ // Then copy the newly loaded TOC anchor to the TOC pointer.
+ SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
+ Chain = TOCVal.getValue(0);
+ Glue = TOCVal.getValue(1);
+
+ // If the function call has an explicit 'nest' parameter, it takes the
+ // place of the environment pointer.
+ assert((!hasNest || !Subtarget.isAIXABI()) &&
+ "Nest parameter is not supported on AIX.");
+ if (!hasNest) {
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
+ Chain = EnvVal.getValue(0);
+ Glue = EnvVal.getValue(1);
+ }
+
+ // The rest of the indirect call sequence is the same as the non-descriptor
+ // DAG.
+ prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
+}
+
+static void
+buildCallOperands(SmallVectorImpl<SDValue> &Ops, CallingConv::ID CallConv,
+ const SDLoc &dl, bool isTailCall, bool isVarArg,
+ bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+ SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
+ const PPCSubtarget &Subtarget, bool isIndirect) {
+ const bool IsPPC64 = Subtarget.isPPC64();
+ // MVT for a general purpose register.
+ const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+
+ // First operand is always the chain.
+ Ops.push_back(Chain);
+
+ // If it's a direct call pass the callee as the second operand.
+ if (!isIndirect)
+ Ops.push_back(Callee);
+ else {
+ assert(!isPatchPoint && "Patch point call are not indirect.");
+
+ // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
+ // on the stack (this would have been done in `LowerCall_64SVR4` or
+ // `LowerCall_AIX`). The call instruction is a pseudo instruction that
+ // represents both the indirect branch and a load that restores the TOC
+ // pointer from the linkage area. The operand for the TOC restore is an add
+ // of the TOC save offset to the stack pointer. This must be the second
+ // operand: after the chain input but before any other variadic arguments.
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+ const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+
+ SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
+ unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
+ Ops.push_back(AddTOC);
+ }
+
+ // Add the register used for the environment pointer.
+ if (Subtarget.usesFunctionDescriptors() && !hasNest)
+ Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
+ RegVT));
+
+
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
+ }
+
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
+ // no way to mark dependencies as implicit here.
+ // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
+ if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint)
+ Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ if (isVarArg && Subtarget.is32BitELFABI())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
- // When performing tail call optimization the callee pops its arguments off
- // the stack. Account for this here so these bytes can be pushed back on in
- // PPCFrameLowering::eliminateCallFramePseudoInstr.
- int BytesCalleePops =
- (CallConv == CallingConv::Fast &&
- getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
-
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
@@ -5236,8 +5416,40 @@ SDValue PPCTargetLowering::FinishCall(
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ // If the glue is valid, it is the last operand.
+ if (Glue.getNode())
+ Ops.push_back(Glue);
+}
+
+SDValue PPCTargetLowering::FinishCall(
+ CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg,
+ bool isPatchPoint, bool hasNest, SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
+ SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
+ unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals, ImmutableCallSite CS) const {
+
+ if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI())
+ setUsesTOCBasePtr(DAG);
+
+ const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint);
+ unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall,
+ DAG.getMachineFunction().getFunction(),
+ Callee, Subtarget, DAG.getTarget());
+
+ if (!isIndirect)
+ Callee = transformCallee(Callee, DAG, dl, Subtarget);
+ else if (Subtarget.usesFunctionDescriptors())
+ prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS,
+ dl, hasNest, Subtarget);
+ else
+ prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
+
+ // Build the operand list for the call instruction.
+ SmallVector<SDValue, 8> Ops;
+ buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint,
+ hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff,
+ Subtarget, isIndirect);
// Emit tail call.
if (isTailCall) {
@@ -5246,81 +5458,32 @@ SDValue PPCTargetLowering::FinishCall(
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee)) &&
- "Expecting an global address, external symbol, absolute value or register");
-
+ "Expecting a global address, external symbol, absolute value or "
+ "register");
+ assert(CallOpc == PPCISD::TC_RETURN &&
+ "Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
+ return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
}
- // Add a NOP immediately after the branch instruction when using the 64-bit
- // SVR4 or the AIX ABI.
- // At link time, if caller and callee are in a different module and
- // thus have a different TOC, the call will be replaced with a call to a stub
- // function which saves the current TOC, loads the TOC of the callee and
- // branches to the callee. The NOP will be replaced with a load instruction
- // which restores the TOC of the caller from the TOC save slot of the current
- // stack frame. If caller and callee belong to the same module (and have the
- // same TOC), the NOP will remain unchanged, or become some other NOP.
-
- MachineFunction &MF = DAG.getMachineFunction();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- if (!isTailCall && !isPatchPoint &&
- ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
- Subtarget.isAIXABI())) {
- if (CallOpc == PPCISD::BCTRL) {
- if (Subtarget.isAIXABI())
- report_fatal_error("Indirect call on AIX is not implemented.");
-
- // This is a call through a function pointer.
- // Restore the caller TOC from the save area into R2.
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
- // We are using a target-specific load with r2 hard coded, because the
- // result of a target-independent load would never go directly into r2,
- // since r2 is a reserved register (which prevents the register allocator
- // from allocating it), resulting in an additional register being
- // allocated and an unnecessary move instruction being generated.
- CallOpc = PPCISD::BCTRL_LOAD_TOC;
-
- SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
- unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
- SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
- SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
+ Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
+ Glue = Chain.getValue(1);
- // The address needs to go after the chain input but before the flag (or
- // any other variadic arguments).
- Ops.insert(std::next(Ops.begin()), AddTOC);
- } else if (CallOpc == PPCISD::CALL &&
- !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) {
- // Otherwise insert NOP for non-local calls.
- CallOpc = PPCISD::CALL_NOP;
- }
- }
-
- if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) {
- // On AIX, direct function calls reference the symbol for the function's
- // entry point, which is named by inserting a "." before the function's
- // C-linkage name.
- GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
- auto &Context = DAG.getMachineFunction().getMMI().getContext();
- MCSymbol *S = Context.getOrCreateSymbol(Twine(".") +
- Twine(G->getGlobal()->getName()));
- Callee = DAG.getMCSymbol(S, PtrVT);
- // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
- Ops[1] = Callee;
- }
-
- Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
+ int BytesCalleePops = (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt)
+ ? NumBytes
+ : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
DAG.getIntPtrConstant(BytesCalleePops, dl, true),
- InFlag, dl);
- if (!Ins.empty())
- InFlag = Chain.getValue(1);
+ Glue, dl);
+ Glue = Chain.getValue(1);
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals);
}
SDValue
@@ -6273,8 +6436,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Check if this is an indirect call (MTCTR/BCTRL).
- // See PrepareCall() for more information about calls through function
- // pointers in the 64-bit SVR4 ABI.
+ // See prepareDescriptorIndirectCall and buildCallOperands for more
+ // information about calls through function pointers in the 64-bit SVR4 ABI.
if (!isTailCall && !isPatchPoint &&
!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
@@ -6695,6 +6858,205 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
NumBytes, Ins, InVals, CS);
}
+static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State) {
+
+ if (ValVT == MVT::f128)
+ report_fatal_error("f128 is unimplemented on AIX.");
+
+ if (ArgFlags.isByVal())
+ report_fatal_error("Passing structure by value is unimplemented.");
+
+ if (ArgFlags.isNest())
+ report_fatal_error("Nest arguments are unimplemented.");
+
+ if (ValVT.isVector() || LocVT.isVector())
+ report_fatal_error("Vector arguments are unimplemented on AIX.");
+
+ const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
+ State.getMachineFunction().getSubtarget());
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+ static const MCPhysReg GPR_32[] = {// 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10};
+ static const MCPhysReg GPR_64[] = {// 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+
+ // Arguments always reserve parameter save area.
+ switch (ValVT.SimpleTy) {
+ default:
+ report_fatal_error("Unhandled value type for argument.");
+ case MVT::i64:
+ // i64 arguments should have been split to i32 for PPC32.
+ assert(IsPPC64 && "PPC32 should have split i64 values.");
+ LLVM_FALLTHROUGH;
+ case MVT::i1:
+ case MVT::i32:
+ State.AllocateStack(PtrByteSize, PtrByteSize);
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+ // Promote integers if needed.
+ if (ValVT.getSizeInBits() < RegVT.getSizeInBits())
+ LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
+ : CCValAssign::LocInfo::ZExt;
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ }
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ return false;
+
+ case MVT::f32:
+ case MVT::f64: {
+ // Parameter save area (PSA) is reserved even if the float passes in fpr.
+ const unsigned StoreSize = LocVT.getStoreSize();
+ // Floats are always 4-byte aligned in the PSA on AIX.
+ // This includes f64 in 64-bit mode for ABI compatibility.
+ State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
+ if (unsigned Reg = State.AllocateReg(FPR))
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+
+ // AIX requires that GPRs are reserved for float arguments.
+ // Successfully reserved GPRs are only initialized for vararg calls.
+ MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
+ for (unsigned I = 0; I < StoreSize; I += PtrByteSize) {
+ if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
+ if (State.isVarArg()) {
+ // Custom handling is required for:
+ // f64 in PPC32 needs to be split into 2 GPRs.
+ // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
+ }
+ } else if (State.isVarArg()) {
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ }
+ }
+
+ return false;
+ }
+ }
+ return true;
+}
+
+static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
+ bool IsPPC64) {
+ assert((IsPPC64 || SVT != MVT::i64) &&
+ "i64 should have been split for 32-bit codegen.");
+
+ switch (SVT) {
+ default:
+ report_fatal_error("Unexpected value type for formal argument");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ case MVT::f32:
+ return &PPC::F4RCRegClass;
+ case MVT::f64:
+ return &PPC::F8RCRegClass;
+ }
+}
+
+static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
+ SelectionDAG &DAG, SDValue ArgValue,
+ MVT LocVT, const SDLoc &dl) {
+ assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
+ assert(ValVT.getSizeInBits() < LocVT.getSizeInBits());
+
+ if (Flags.isSExt())
+ ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+ DAG.getValueType(ValVT));
+ else if (Flags.isZExt())
+ ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+ DAG.getValueType(ValVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
+}
+
+SDValue PPCTargetLowering::LowerFormalArguments_AIX(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+ assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::Fast) &&
+ "Unexpected calling convention!");
+
+ if (isVarArg)
+ report_fatal_error("This call type is unimplemented on AIX.");
+
+ if (getTargetMachine().Options.GuaranteedTailCallOpt)
+ report_fatal_error("Tail call support is unimplemented on AIX.");
+
+ if (useSoftFloat())
+ report_fatal_error("Soft float support is unimplemented on AIX.");
+
+ const PPCSubtarget &Subtarget =
+ static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX support is not supported on AIX.");
+
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ MachineFunction &MF = DAG.getMachineFunction();
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ // On AIX a minimum of 8 words is saved to the parameter save area.
+ const unsigned MinParameterSaveArea = 8 * PtrByteSize;
+ CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue;
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (VA.isRegLoc()) {
+ EVT ValVT = VA.getValVT();
+ MVT LocVT = VA.getLocVT();
+ MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+ unsigned VReg =
+ MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+ ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+ if (ValVT.isScalarInteger() &&
+ (ValVT.getSizeInBits() < LocVT.getSizeInBits())) {
+ ArgValue =
+ truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
+ }
+ InVals.push_back(ArgValue);
+ } else {
+ report_fatal_error("Handling of formal arguments on the stack is "
+ "unimplemented!");
+ }
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCInfo.getNextStackOffset();
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so
+ // that taking the difference between two stack areas will result in an
+ // aligned stack.
+ MinReservedArea =
+ EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ return Chain;
+}
SDValue PPCTargetLowering::LowerCall_AIX(
SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
@@ -6705,22 +7067,33 @@ SDValue PPCTargetLowering::LowerCall_AIX(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const {
- assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
- "Unimplemented calling convention!");
- if (isVarArg || isPatchPoint)
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::Fast) && "Unexpected calling convention!");
+
+ if (isPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- bool isPPC64 = PtrVT == MVT::i64;
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned NumOps = Outs.size();
+ const PPCSubtarget& Subtarget =
+ static_cast<const PPCSubtarget&>(DAG.getSubtarget());
+ if (Subtarget.hasQPX())
+ report_fatal_error("QPX is not supported on AIX.");
+ if (Subtarget.hasAltivec())
+ report_fatal_error("Altivec support is unimplemented on AIX.");
+ MachineFunction &MF = DAG.getMachineFunction();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, parameter list area.
- // On XCOFF, we start with 24/48, which is reserved space for
- // [SP][CR][LR][2 x reserved][TOC].
- unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ // Reserve space for the linkage save area (LSA) on the stack.
+ // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
+ // [SP][CR][LR][2 x reserved][TOC].
+ // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
+ const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+ const bool IsPPC64 = Subtarget.isPPC64();
+ const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+ CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if the callee
@@ -6728,98 +7101,101 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
+ const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
+ const unsigned NumBytes = LinkageSize + MinParameterSaveAreaSize;
// Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog
- // inserter pass.
+ // These operations are automatically eliminated by the prolog/epilog pass.
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
- static const MCPhysReg GPR_32[] = { // 32-bit registers.
- PPC::R3, PPC::R4, PPC::R5, PPC::R6,
- PPC::R7, PPC::R8, PPC::R9, PPC::R10
- };
- static const MCPhysReg GPR_64[] = { // 64-bit registers.
- PPC::X3, PPC::X4, PPC::X5, PPC::X6,
- PPC::X7, PPC::X8, PPC::X9, PPC::X10
- };
-
- const unsigned NumGPRs = isPPC64 ? array_lengthof(GPR_64)
- : array_lengthof(GPR_32);
- const unsigned NumFPRs = array_lengthof(FPR);
- assert(NumFPRs == 13 && "Only FPR 1-13 could be used for parameter passing "
- "on AIX");
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
- unsigned GPR_idx = 0, FPR_idx = 0;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
+ CCValAssign &VA = ArgLocs[I++];
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ if (VA.isMemLoc())
+ report_fatal_error("Handling of placing parameters on the stack is "
+ "unimplemented!");
+ if (!VA.isRegLoc())
+ report_fatal_error(
+ "Unexpected non-register location for function call argument.");
- if (isTailCall)
- report_fatal_error("Handling of tail call is unimplemented!");
- int SPDiff = 0;
+ SDValue Arg = OutVals[VA.getValNo()];
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ if (!VA.needsCustom()) {
+ switch (VA.getLocInfo()) {
+ default:
+ report_fatal_error("Unexpected argument extension type.");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- // Promote integers if needed.
- if (Arg.getValueType() == MVT::i1 ||
- (isPPC64 && Arg.getValueType() == MVT::i32)) {
- unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- Arg = DAG.getNode(ExtOp, dl, PtrVT, Arg);
+ continue;
}
- // Note: "by value" is code for passing a structure by value, not
- // basic types.
- if (Flags.isByVal())
- report_fatal_error("Passing structure by value is unimplemented!");
+ // Custom handling is used for GPR initializations for vararg float
+ // arguments.
+ assert(isVarArg && VA.getValVT().isFloatingPoint() &&
+ VA.getLocVT().isInteger() &&
+ "Unexpected custom register handling for calling convention.");
- switch (Arg.getSimpleValueType().SimpleTy) {
- default: llvm_unreachable("Unexpected ValueType for argument!");
- case MVT::i1:
- case MVT::i32:
- case MVT::i64:
- if (GPR_idx != NumGPRs)
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
- else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- break;
- case MVT::f32:
- case MVT::f64:
- if (FPR_idx != NumFPRs) {
- RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+ SDValue ArgAsInt =
+ DAG.getBitcast(MVT::getIntegerVT(VA.getValVT().getSizeInBits()), Arg);
- // If we have any FPRs remaining, we may also have GPRs remaining.
- // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
- // GPRs.
- if (GPR_idx != NumGPRs)
- ++GPR_idx;
- if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64)
- ++GPR_idx;
- } else
- report_fatal_error("Handling of placing parameters on the stack is "
- "unimplemented!");
- break;
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v1i128:
- case MVT::f128:
- case MVT::v4f64:
- case MVT::v4i1:
- report_fatal_error("Handling of this parameter type is unimplemented!");
- }
- }
+ if (Arg.getValueType().getStoreSize() == VA.getLocVT().getStoreSize())
+ // f32 in 32-bit GPR
+ // f64 in 64-bit GPR
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
+ else if (Arg.getValueType().getSizeInBits() < VA.getLocVT().getSizeInBits())
+ // f32 in 64-bit GPR.
+ RegsToPass.push_back(std::make_pair(
+ VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, VA.getLocVT())));
+ else {
+ // f64 in two 32-bit GPRs
+ // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
+ assert(Arg.getValueType() == MVT::f64 && isVarArg && !IsPPC64 &&
+ "Unexpected custom register for argument!");
+ CCValAssign &GPR1 = VA;
+ SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
+ DAG.getConstant(32, dl, MVT::i8));
+ RegsToPass.push_back(std::make_pair(
+ GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
+ assert(I != E && "A second custom GPR is expected!");
+ CCValAssign &GPR2 = ArgLocs[I++];
+ assert(GPR2.isRegLoc() && GPR2.getValNo() == GPR1.getValNo() &&
+ GPR2.needsCustom() && "A second custom GPR is expected!");
+ RegsToPass.push_back(std::make_pair(
+ GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
+ }
+ }
+
+ // For indirect calls, we need to save the TOC base to the stack for
+ // restoration after the call.
+ if (!isTailCall && !isPatchPoint &&
+ !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee)) {
+ const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
+ const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
+ const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ const unsigned TOCSaveOffset =
+ Subtarget.getFrameLowering()->getTOCSaveOffset();
- if (!isFunctionGlobalAddress(Callee) &&
- !isa<ExternalSymbolSDNode>(Callee))
- report_fatal_error("Handling of indirect call is unimplemented!");
+ setUsesTOCBasePtr(DAG);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
+ SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
+ }
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -6829,10 +7205,11 @@ SDValue PPCTargetLowering::LowerCall_AIX(
InFlag = Chain.getValue(1);
}
+ const int SPDiff = 0;
return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
- /* unused except on PPC64 ELFv1 */ false, DAG,
- RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
- NumBytes, Ins, InVals, CS);
+ /* unused except on PPC64 ELFv1 */ false, DAG, RegsToPass,
+ InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins,
+ InVals, CS);
}
bool
@@ -7121,8 +7498,7 @@ SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
"Custom lowering only for i1 results");
SDLoc DL(Op);
- return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
- Op.getOperand(0));
+ return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
}
SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
@@ -7188,17 +7564,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
+ bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
+ bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
// We might be able to do better than this under some circumstances, but in
// general, fsel-based lowering of select is a finite-math-only optimization.
// For more information, see section F.3 of the 2.06 ISA specification.
- if (!DAG.getTarget().Options.NoInfsFPMath ||
- !DAG.getTarget().Options.NoNaNsFPMath)
+ // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
+ // presence of infinities.
+ if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
return Op;
- // TODO: Propagate flags from the select rather than global settings.
- SDNodeFlags Flags;
- Flags.setNoInfs(true);
- Flags.setNoNaNs(true);
-
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
@@ -7207,6 +7581,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
SDLoc dl(Op);
+ if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
+ switch (CC) {
+ default:
+ // Not a min/max but with finite math, we may still be able to use fsel.
+ if (HasNoInfs && HasNoNaNs)
+ break;
+ return Op;
+ case ISD::SETOGT:
+ case ISD::SETGT:
+ return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETLT:
+ return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+ }
+ }
+
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
+
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
SDValue Sel1;
@@ -8055,8 +8450,6 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
/// SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, const SDLoc &dl) {
- assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
-
static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -8376,29 +8769,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// We have XXSPLTIB for constant splats one byte wide
- if (Subtarget.hasP9Vector() && SplatSize == 1) {
- // This is a splat of 1-byte elements with some elements potentially undef.
- // Rather than trying to match undef in the SDAG patterns, ensure that all
- // elements are the same constant.
- if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
- SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
- dl, MVT::i32));
- SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
- if (Op.getValueType() != MVT::v16i8)
- return DAG.getBitcast(Op.getValueType(), NewBV);
- return NewBV;
- }
-
- // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
- // detect that constant splats like v8i16: 0xABAB are really just splats
- // of a 1-byte constant. In this case, we need to convert the node to a
- // splat of v16i8 and a bitcast.
- if (Op.getValueType() != MVT::v16i8)
- return DAG.getBitcast(Op.getValueType(),
- DAG.getConstant(SplatBits, dl, MVT::v16i8));
-
- return Op;
- }
+ // FIXME: SplatBits is an unsigned int being cast to an int while passing it
+ // as an argument to BuildSplatiI. Given SplatSize == 1 it is okay here.
+ if (Subtarget.hasP9Vector() && SplatSize == 1)
+ return BuildSplatI(SplatBits, SplatSize, Op.getValueType(), DAG, dl);
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
@@ -8930,19 +9304,19 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasP9Vector()) {
if (PPC::isXXBRHShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
- SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv);
+ SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
} else if (PPC::isXXBRWShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
- SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv);
+ SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
} else if (PPC::isXXBRDShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
- SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv);
+ SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
} else if (PPC::isXXBRQShuffleMask(SVOp)) {
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
- SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv);
+ SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
}
}
@@ -9503,7 +9877,7 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
Op.getOperand(0));
// XXBRD
- Op = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
// MFVSRD
int VectorIndex = 0;
if (Subtarget.isLittleEndian())
@@ -10845,9 +11219,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
DebugLoc dl = MI.getDebugLoc();
TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
- } else if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_CC_F4 ||
+ } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
MI.getOpcode() == PPC::SELECT_CC_F16 ||
MI.getOpcode() == PPC::SELECT_CC_QFRC ||
@@ -10859,8 +11231,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_CC_VSRC ||
MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
MI.getOpcode() == PPC::SELECT_CC_SPE ||
- MI.getOpcode() == PPC::SELECT_I4 ||
- MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
MI.getOpcode() == PPC::SELECT_F16 ||
@@ -11397,28 +11767,28 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
- } else if (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT8) {
- unsigned Opcode = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
- MI.getOpcode() == PPC::ANDIo_1_GT_BIT8)
- ? PPC::ANDIo8
- : PPC::ANDIo;
- bool isEQ = (MI.getOpcode() == PPC::ANDIo_1_EQ_BIT ||
- MI.getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+ } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
+ unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
+ MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
+ ? PPC::ANDI8_rec
+ : PPC::ANDI_rec;
+ bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
+ MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
MachineRegisterInfo &RegInfo = F->getRegInfo();
Register Dest = RegInfo.createVirtualRegister(
- Opcode == PPC::ANDIo ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
+ Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
- DebugLoc dl = MI.getDebugLoc();
- BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ DebugLoc Dl = MI.getDebugLoc();
+ BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
.addReg(MI.getOperand(1).getReg())
.addImm(1);
- BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
MI.getOperand(0).getReg())
- .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
} else if (MI.getOpcode() == PPC::TCHECK_RET) {
DebugLoc Dl = MI.getDebugLoc();
MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -11638,7 +12008,7 @@ unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are two or more FDIVs (for embedded cores with only
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default:
return 3;
case PPC::DIR_440:
@@ -14111,7 +14481,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
}
Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
- switch (Subtarget.getDarwinDirective()) {
+ switch (Subtarget.getCPUDirective()) {
default: break;
case PPC::DIR_970:
case PPC::DIR_PWR4:
@@ -14121,7 +14491,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
case PPC::DIR_PWR8:
- case PPC::DIR_PWR9: {
+ case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE: {
if (!ML)
break;
@@ -14309,6 +14680,17 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::VSFRCRegClass);
}
+ // If we name a VSX register, we can't defer to the base class because it
+ // will not recognize the correct register (their names will be VSL{0-31}
+ // and V{0-31} so they won't match). So we match them here.
+ if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
+ int VSNum = atoi(Constraint.data() + 3);
+ assert(VSNum >= 0 && VSNum <= 63 &&
+ "Attempted to access a vsr out of range");
+ if (VSNum < 32)
+ return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
+ return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
+ }
std::pair<unsigned, const TargetRegisterClass *> R =
TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
@@ -14513,16 +14895,15 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-Register PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const {
bool isPPC64 = Subtarget.isPPC64();
bool IsDarwinABI = Subtarget.isDarwinABI();
- if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
- (!isPPC64 && VT != MVT::i32))
+ bool is64Bit = isPPC64 && VT == LLT::scalar(64);
+ if (!is64Bit && VT != LLT::scalar(32))
report_fatal_error("Invalid register global variable type");
- bool is64Bit = isPPC64 && VT == MVT::i64;
Register Reg = StringSwitch<Register>(RegName)
.Case("r1", is64Bit ? PPC::X1 : PPC::R1)
.Case("r2", (IsDarwinABI || isPPC64) ? Register() : PPC::R2)
@@ -14870,6 +15251,9 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
+ if (VT.isFloatingPoint() && !Subtarget.allowsUnalignedFPAccess())
+ return false;
+
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
@@ -14889,7 +15273,8 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return true;
}
-bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -15278,7 +15663,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
return SDValue();
auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
- switch (this->Subtarget.getDarwinDirective()) {
+ switch (this->Subtarget.getCPUDirective()) {
default:
// TODO: enhance the condition for subtarget before pwr8
return false;
@@ -15288,6 +15673,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
// vector 7 2 2
return true;
case PPC::DIR_PWR9:
+ case PPC::DIR_PWR_FUTURE:
// type mul add shl
// scalar 5 2 2
// vector 7 2 2
@@ -15357,12 +15743,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
if (!CI->isTailCall())
return false;
- // If tail calls are disabled for the caller then we are done.
- const Function *Caller = CI->getParent()->getParent();
- auto Attr = Caller->getFnAttribute("disable-tail-calls");
- if (Attr.getValueAsString() == "true")
- return false;
-
// If sibling calls have been disabled and tail-calls aren't guaranteed
// there is no reason to duplicate.
auto &TM = getTargetMachine();
@@ -15375,6 +15755,7 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return false;
// Make sure the callee and caller calling conventions are eligible for tco.
+ const Function *Caller = CI->getParent()->getParent();
if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
CI->getCallingConv()))
return false;