aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp583
1 files changed, 416 insertions, 167 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 26e9f13f9ff4..1e3e14c71144 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -47,7 +47,6 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -83,6 +82,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -111,6 +111,9 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableSCO("disable-ppc-sco",
cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
+static cl::opt<bool> EnableQuadPrecision("enable-ppc-quad-precision",
+cl::desc("enable quad precision float support on ppc"), cl::Hidden);
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
@@ -134,8 +137,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
if (!useSoftFloat()) {
- addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
- addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ if (hasSPE()) {
+ addRegisterClass(MVT::f32, &PPC::SPE4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
+ } else {
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ }
}
// Match BITREVERSE to customized fast code sequence in the td file.
@@ -159,15 +167,26 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
- setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
- setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
- setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
+ if (!Subtarget.hasSPE()) {
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
+ }
+
+ // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
+ const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+ for (MVT VT : ScalarIntVTs) {
+ setOperationAction(ISD::ADDC, VT, Legal);
+ setOperationAction(ISD::ADDE, VT, Legal);
+ setOperationAction(ISD::SUBC, VT, Legal);
+ setOperationAction(ISD::SUBE, VT, Legal);
+ }
if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -201,9 +220,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
}
- // This is used in the ppcf128->int sequence. Note it has different semantics
- // from FP_ROUND: that rounds to nearest, this rounds to zero.
- setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available).
+ setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
@@ -253,13 +273,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FREM , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
- setOperationAction(ISD::FMA , MVT::f32, Legal);
+ if (Subtarget.hasSPE()) {
+ setOperationAction(ISD::FMA , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Expand);
+ } else {
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
+ }
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
@@ -296,7 +321,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
// to speed up scalar BSWAP64.
- // CTPOP or CTTZ were introduced in P8/P9 respectivelly
+ // CTPOP or CTTZ were introduced in P8/P9 respectively
setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
if (Subtarget.isISA3_0()) {
setOperationAction(ISD::BSWAP, MVT::i64 , Custom);
@@ -342,12 +367,19 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ if (Subtarget.hasSPE()) {
+ // SPE has built-in conversions
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ } else {
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- // PowerPC does not have [U|S]INT_TO_FP
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ }
if (Subtarget.hasDirectMove() && isPPC64) {
setOperationAction(ISD::BITCAST, MVT::f32, Legal);
@@ -445,6 +477,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Comparisons that require checking two conditions.
+ if (Subtarget.hasSPE()) {
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+ }
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
@@ -472,7 +510,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ if (Subtarget.hasSPE())
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ else
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
}
// With the instructions enabled under FPCVT, we can do everything.
@@ -785,6 +826,46 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SHL, MVT::v1i128, Legal);
setOperationAction(ISD::SRL, MVT::v1i128, Legal);
setOperationAction(ISD::SRA, MVT::v1i128, Expand);
+
+ if (EnableQuadPrecision) {
+ addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
+ setOperationAction(ISD::FADD, MVT::f128, Legal);
+ setOperationAction(ISD::FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
+ // No extending loads to f128 on PPC.
+ for (MVT FPT : MVT::fp_valuetypes())
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
+ setOperationAction(ISD::FMA, MVT::f128, Legal);
+ setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
+
+ setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
+ setOperationAction(ISD::FRINT, MVT::f128, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f128, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
+ setOperationAction(ISD::FROUND, MVT::f128, Legal);
+
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ // No implementation for these ops for PowerPC.
+ setOperationAction(ISD::FSIN , MVT::f128, Expand);
+ setOperationAction(ISD::FCOS , MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ }
+
}
if (Subtarget.hasP9Altivec()) {
@@ -1021,6 +1102,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
}
+ if (EnableQuadPrecision) {
+ setLibcallName(RTLIB::LOG_F128, "logf128");
+ setLibcallName(RTLIB::LOG2_F128, "log2f128");
+ setLibcallName(RTLIB::LOG10_F128, "log10f128");
+ setLibcallName(RTLIB::EXP_F128, "expf128");
+ setLibcallName(RTLIB::EXP2_F128, "exp2f128");
+ setLibcallName(RTLIB::SIN_F128, "sinf128");
+ setLibcallName(RTLIB::COS_F128, "cosf128");
+ setLibcallName(RTLIB::POW_F128, "powf128");
+ setLibcallName(RTLIB::FMIN_F128, "fminf128");
+ setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
+ setLibcallName(RTLIB::POWI_F128, "__powikf2");
+ setLibcallName(RTLIB::REM_F128, "fmodf128");
+ }
+
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
@@ -1036,6 +1132,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
default: break;
case PPC::DIR_970:
case PPC::DIR_A2:
+ case PPC::DIR_E500:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
case PPC::DIR_PWR4:
@@ -1126,10 +1223,28 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
return Align;
}
+unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (Subtarget.hasSPE() && VT == MVT::f64)
+ return 2;
+ return PPCTargetLowering::getNumRegisters(Context, VT);
+}
+
+MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (Subtarget.hasSPE() && VT == MVT::f64)
+ return MVT::i32;
+ return PPCTargetLowering::getRegisterType(Context, VT);
+}
+
bool PPCTargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
+bool PPCTargetLowering::hasSPE() const {
+ return Subtarget.hasSPE();
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -1142,6 +1257,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
+ case PPCISD::FP_TO_UINT_IN_VSR:
+ return "PPCISD::FP_TO_UINT_IN_VSR,";
+ case PPCISD::FP_TO_SINT_IN_VSR:
+ return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::STFIWX: return "PPCISD::STFIWX";
@@ -1195,6 +1314,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SExtVElems: return "PPCISD::SExtVElems";
case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
+ case PPCISD::ST_VSR_SCAL_INT:
+ return "PPCISD::ST_VSR_SCAL_INT";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@@ -1231,6 +1352,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
case PPCISD::QBFLT: return "PPCISD::QBFLT";
case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
+ case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
}
return nullptr;
}
@@ -1461,7 +1583,7 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
}
/**
- * \brief Common function used to match vmrgew and vmrgow shuffles
+ * Common function used to match vmrgew and vmrgow shuffles
*
* The indexOffset determines whether to look for even or odd words in
* the shuffle mask. This is based on the of the endianness of the target
@@ -1518,7 +1640,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
}
/**
- * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
+ * Determine if the specified shuffle mask is suitable for the vmrgew or
* vmrgow instructions.
*
* \param[in] N The shuffle vector SD Node to analyze
@@ -2550,10 +2672,11 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual BlockAddress is stored in the TOC.
- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
- setUsesTOCBasePtr(DAG);
+ if (Subtarget.isSVR4ABI() && isPositionIndependent()) {
+ if (Subtarget.isPPC64())
+ setUsesTOCBasePtr(DAG);
SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
- return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
+ return getTOCEntry(DAG, SDLoc(BASDN), Subtarget.isPPC64(), GA);
}
unsigned MOHiFlag, MOLoFlag;
@@ -2571,7 +2694,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
// large models could be added if users need it, at the cost of
// additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- if (DAG.getTarget().Options.EmulatedTLS)
+ if (DAG.getTarget().useEmulatedTLS())
return LowerToTLSEmulatedModel(GA, DAG);
SDLoc dl(GA);
@@ -3116,7 +3239,7 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
- ArgVT == MVT::v1i128)
+ ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
Align = 16;
// QPX vector types stored in double-precision are padded to a 32 byte
// boundary.
@@ -3196,7 +3319,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
- ArgVT == MVT::v1i128)
+ ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
if (AvailableVRs > 0) {
--AvailableVRs;
return false;
@@ -3285,7 +3408,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
// Reserve space for the linkage area on the stack.
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, PtrByteSize);
- if (useSoftFloat())
+ if (useSoftFloat() || hasSPE())
CCInfo.PreAnalyzeFormalArguments(Ins);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -3309,12 +3432,16 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
case MVT::f32:
if (Subtarget.hasP8Vector())
RC = &PPC::VSSRCRegClass;
+ else if (Subtarget.hasSPE())
+ RC = &PPC::SPE4RCRegClass;
else
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
+ else if (Subtarget.hasSPE())
+ RC = &PPC::SPERCRegClass;
else
RC = &PPC::F8RCRegClass;
break;
@@ -3403,7 +3530,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
};
unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
- if (useSoftFloat())
+ if (useSoftFloat() || hasSPE())
NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
@@ -3785,23 +3912,23 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v2f64:
case MVT::v2i64:
case MVT::v1i128:
+ case MVT::f128:
if (!Subtarget.hasQPX()) {
- // These can be scalar arguments or elements of a vector array type
- // passed directly. The latter are used to implement ELFv2 homogenous
- // vector aggregates.
- if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
- ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- ++VR_idx;
- } else {
- if (CallConv == CallingConv::Fast)
- ComputeArgOffset();
-
- needsLoad = true;
- }
- if (CallConv != CallingConv::Fast || needsLoad)
- ArgOffset += 16;
- break;
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++VR_idx;
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+ needsLoad = true;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += 16;
+ break;
} // not QPX
assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
@@ -4263,7 +4390,7 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
unsigned CallerMinReservedArea = FI->getMinReservedArea();
int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
- // Remember only if the new adjustement is bigger.
+ // Remember only if the new adjustment is bigger.
if (SPDiff < FI->getTailCallSPDelta())
FI->setTailCallSPDelta(SPDiff);
@@ -4939,7 +5066,11 @@ SDValue PPCTargetLowering::LowerCallResult(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+ CCRetInfo.AnalyzeCallResult(
+ Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
+ ? RetCC_PPC_Cold
+ : RetCC_PPC);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
@@ -5108,15 +5239,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(isa<GlobalAddressSDNode>(Callee) &&
"Callee should be an llvm::Function object.");
- DEBUG(
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- const unsigned Width = 80 - strlen("TCO caller: ")
- - strlen(", callee linkage: 0, 0");
- dbgs() << "TCO caller: "
- << left_justify(DAG.getMachineFunction().getName(), Width)
- << ", callee linkage: "
- << GV->getVisibility() << ", " << GV->getLinkage() << "\n"
- );
+ LLVM_DEBUG(
+ const GlobalValue *GV =
+ cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ const unsigned Width =
+ 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
+ dbgs() << "TCO caller: "
+ << left_justify(DAG.getMachineFunction().getName(), Width)
+ << ", callee linkage: " << GV->getVisibility() << ", "
+ << GV->getLinkage() << "\n");
}
}
@@ -5159,6 +5290,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// of the 32-bit SVR4 ABI stack frame layout.
assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Cold ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");
unsigned PtrByteSize = 4;
@@ -5462,6 +5594,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// arguments that will be in registers.
unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
+ // Avoid allocating parameter area for fastcc functions if all the arguments
+ // can be passed in the registers.
+ if (CallConv == CallingConv::Fast)
+ HasParameterArea = false;
+
// Add up all the space actually used.
for (unsigned i = 0; i != NumOps; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -5472,9 +5609,11 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
continue;
if (CallConv == CallingConv::Fast) {
- if (Flags.isByVal())
+ if (Flags.isByVal()) {
NumGPRsUsed += (Flags.getByValSize()+7)/8;
- else
+ if (NumGPRsUsed > NumGPRs)
+ HasParameterArea = true;
+ } else {
switch (ArgVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i1:
@@ -5489,6 +5628,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v2f64:
case MVT::v2i64:
case MVT::v1i128:
+ case MVT::f128:
if (++NumVRsUsed <= NumVRs)
continue;
break;
@@ -5511,6 +5651,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
continue;
break;
}
+ HasParameterArea = true;
+ }
}
/* Respect alignment of argument on the stack. */
@@ -5867,6 +6009,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v2f64:
case MVT::v2i64:
case MVT::v1i128:
+ case MVT::f128:
if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
@@ -6420,7 +6563,10 @@ PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
- return CCInfo.CheckReturn(Outs, RetCC_PPC);
+ return CCInfo.CheckReturn(
+ Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
+ ? RetCC_PPC_Cold
+ : RetCC_PPC);
}
SDValue
@@ -6432,7 +6578,10 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+ CCInfo.AnalyzeReturn(Outs,
+ (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
+ ? RetCC_PPC_Cold
+ : RetCC_PPC);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -6852,7 +7001,7 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
RLI.MPI = MPI;
}
-/// \brief Custom lowers floating point to integer conversions to use
+/// Custom lowers floating point to integer conversions to use
/// the direct move instructions available in ISA 2.07 to avoid the
/// need for load/store combinations.
SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
@@ -6889,6 +7038,51 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const {
+
+ // FP to INT conversions are legal for f128.
+ if (EnableQuadPrecision && (Op->getOperand(0).getValueType() == MVT::f128))
+ return Op;
+
+ // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+ // PPC (the libcall is not available).
+ if (Op.getOperand(0).getValueType() == MVT::ppcf128) {
+ if (Op.getValueType() == MVT::i32) {
+ if (Op.getOpcode() == ISD::FP_TO_SINT) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, Op.getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, Op.getOperand(0),
+ DAG.getIntPtrConstant(1, dl));
+
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+
+ // Now use a smaller FP_TO_SINT.
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
+ if (Op.getOpcode() == ISD::FP_TO_UINT) {
+ const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+ APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
+ SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ // TODO: Are there fast-math-flags to propagate to this FSUB?
+ SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128,
+ Op.getOperand(0), Tmp);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
+ True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
+ DAG.getConstant(0x80000000, dl, MVT::i32));
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+ Op.getOperand(0));
+ return DAG.getSelectCC(dl, Op.getOperand(0), Tmp, True, False,
+ ISD::SETGE);
+ }
+ }
+
+ return SDValue();
+ }
+
if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
return LowerFP_TO_INTDirectMove(Op, DAG, dl);
@@ -6970,7 +7164,7 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
-/// \brief Analyze profitability of direct move
+/// Analyze profitability of direct move
/// prefer float load to int load plus direct move
/// when there is no integer use of int load
bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
@@ -7000,7 +7194,7 @@ bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
return false;
}
-/// \brief Custom lowers integer to floating point conversions to use
+/// Custom lowers integer to floating point conversions to use
/// the direct move instructions available in ISA 2.07 to avoid the
/// need for load/store combinations.
SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
@@ -7036,6 +7230,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ // Conversions to f128 are legal.
+ if (EnableQuadPrecision && (Op.getValueType() == MVT::f128))
+ return Op;
+
if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
return SDValue();
@@ -7552,6 +7750,23 @@ static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
return !(IsSplat && IsLoad);
}
+// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
+SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+
+ SDLoc dl(Op);
+ SDValue Op0 = Op->getOperand(0);
+
+ if (!EnableQuadPrecision ||
+ (Op.getValueType() != MVT::f128 ) ||
+ (Op0.getOpcode() != ISD::BUILD_PAIR) ||
+ (Op0.getOperand(0).getValueType() != MVT::i64) ||
+ (Op0.getOperand(1).getValueType() != MVT::i64))
+ return SDValue();
+
+ return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
+ Op0.getOperand(1));
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9302,27 +9517,19 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
- case ISD::VASTART:
- return LowerVASTART(Op, DAG);
-
- case ISD::VAARG:
- return LowerVAARG(Op, DAG);
-
- case ISD::VACOPY:
- return LowerVACOPY(Op, DAG);
- case ISD::STACKRESTORE:
- return LowerSTACKRESTORE(Op, DAG);
-
- case ISD::DYNAMIC_STACKALLOC:
- return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ // Variable argument lowering.
+ case ISD::VASTART: return LowerVASTART(Op, DAG);
+ case ISD::VAARG: return LowerVAARG(Op, DAG);
+ case ISD::VACOPY: return LowerVACOPY(Op, DAG);
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GET_DYNAMIC_AREA_OFFSET:
return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
- case ISD::EH_DWARF_CFA:
- return LowerEH_DWARF_CFA(Op, DAG);
-
+ // Exception handling lowering.
+ case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -9331,8 +9538,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
- case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
- SDLoc(Op));
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -9355,6 +9561,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
// For counter-based loop handling.
case ISD::INTRINSIC_W_CHAIN: return SDValue();
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -9400,7 +9608,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
N->getOperand(1));
- Results.push_back(NewInt);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
Results.push_back(NewInt.getValue(1));
break;
}
@@ -9418,25 +9626,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
- case ISD::FP_ROUND_INREG: {
- assert(N->getValueType(0) == MVT::ppcf128);
- assert(N->getOperand(0).getValueType() == MVT::ppcf128);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- MVT::f64, N->getOperand(0),
- DAG.getIntPtrConstant(0, dl));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- MVT::f64, N->getOperand(0),
- DAG.getIntPtrConstant(1, dl));
-
- // Add the two halves of the long double in round-to-zero mode.
- SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
-
- // We know the low half is about to be thrown away, so just use something
- // convenient.
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
- FPreg, FPreg));
- return;
- }
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.
@@ -10083,6 +10272,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_CC_I8 ||
MI.getOpcode() == PPC::SELECT_CC_F4 ||
MI.getOpcode() == PPC::SELECT_CC_F8 ||
+ MI.getOpcode() == PPC::SELECT_CC_F16 ||
MI.getOpcode() == PPC::SELECT_CC_QFRC ||
MI.getOpcode() == PPC::SELECT_CC_QSRC ||
MI.getOpcode() == PPC::SELECT_CC_QBRC ||
@@ -10090,13 +10280,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
MI.getOpcode() == PPC::SELECT_CC_VSRC ||
+ MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
+ MI.getOpcode() == PPC::SELECT_CC_SPE ||
MI.getOpcode() == PPC::SELECT_I4 ||
MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 ||
MI.getOpcode() == PPC::SELECT_F8 ||
+ MI.getOpcode() == PPC::SELECT_F16 ||
MI.getOpcode() == PPC::SELECT_QFRC ||
MI.getOpcode() == PPC::SELECT_QSRC ||
MI.getOpcode() == PPC::SELECT_QBRC ||
+ MI.getOpcode() == PPC::SELECT_SPE ||
+ MI.getOpcode() == PPC::SELECT_SPE4 ||
MI.getOpcode() == PPC::SELECT_VRRC ||
MI.getOpcode() == PPC::SELECT_VSFRC ||
MI.getOpcode() == PPC::SELECT_VSSRC ||
@@ -10129,6 +10324,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
+ MI.getOpcode() == PPC::SELECT_F16 ||
+ MI.getOpcode() == PPC::SELECT_SPE4 ||
+ MI.getOpcode() == PPC::SELECT_SPE ||
MI.getOpcode() == PPC::SELECT_QFRC ||
MI.getOpcode() == PPC::SELECT_QSRC ||
MI.getOpcode() == PPC::SELECT_QBRC ||
@@ -10681,6 +10879,7 @@ unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
return 3;
case PPC::DIR_440:
case PPC::DIR_A2:
+ case PPC::DIR_E500:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
return 2;
@@ -10962,7 +11161,7 @@ SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
// Size of integers being compared has a critical role in the following
// analysis, so we prefer to do this when all types are legal.
- if (!DCI.isAfterLegalizeVectorOps())
+ if (!DCI.isAfterLegalizeDAG())
return SDValue();
// If all users of SETCC extend its value to a legal integer type
@@ -11560,7 +11759,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
ShiftCst);
}
-/// \brief Reduces the number of fp-to-int conversion when building a vector.
+/// Reduces the number of fp-to-int conversion when building a vector.
///
/// If this vector is built out of floating to integer conversions,
/// transform it to a vector built out of floating point values followed by a
@@ -11640,7 +11839,7 @@ combineElementTruncationToVectorTruncation(SDNode *N,
return SDValue();
}
-/// \brief Reduce the number of loads when building a vector.
+/// Reduce the number of loads when building a vector.
///
/// Building a vector out of multiple loads can be converted to a load
/// of the vector type if the loads are consecutive. If the loads are
@@ -11948,10 +12147,12 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
SDLoc dl(N);
SDValue Op(N, 0);
- // Don't handle ppc_fp128 here or i1 conversions.
+ // Don't handle ppc_fp128 here or conversions that are out-of-range capable
+ // from the hardware.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
- if (Op.getOperand(0).getValueType() == MVT::i1)
+ if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
+ Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
return SDValue();
SDValue FirstOperand(Op.getOperand(0));
@@ -12171,6 +12372,64 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
return Store;
}
+// Handle DAG combine for STORE (FP_TO_INT F).
+SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ unsigned Opcode = N->getOperand(1).getOpcode();
+
+ assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
+ && "Not a FP_TO_INT Instruction!");
+
+ SDValue Val = N->getOperand(1).getOperand(0);
+ EVT Op1VT = N->getOperand(1).getValueType();
+ EVT ResVT = Val.getValueType();
+
+ // Floating point types smaller than 32 bits are not legal on Power.
+ if (ResVT.getScalarSizeInBits() < 32)
+ return SDValue();
+
+ // Only perform combine for conversion to i64/i32 or power9 i16/i8.
+ bool ValidTypeForStoreFltAsInt =
+ (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
+ (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
+
+ if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Altivec() ||
+ cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
+ return SDValue();
+
+ // Extend f32 values to f64
+ if (ResVT.getScalarSizeInBits() == 32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ // Set signed or unsigned conversion opcode.
+ unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
+ PPCISD::FP_TO_SINT_IN_VSR :
+ PPCISD::FP_TO_UINT_IN_VSR;
+
+ Val = DAG.getNode(ConvOpcode,
+ dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ // Set number of bytes being converted.
+ unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
+ SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
+ DAG.getIntPtrConstant(ByteSize, dl, false),
+ DAG.getValueType(Op1VT) };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
+ DAG.getVTList(MVT::Other), Ops,
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -12210,60 +12469,22 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP:
return combineFPToIntToFP(N, DCI);
case ISD::STORE: {
+
EVT Op1VT = N->getOperand(1).getValueType();
- bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
- (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
-
- // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
- if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
- N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
- ValidTypeForStoreFltAsInt &&
- N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
- SDValue Val = N->getOperand(1).getOperand(0);
- if (Val.getValueType() == MVT::f32) {
- Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- }
- Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
-
- if (Op1VT == MVT::i32) {
- SDValue Ops[] = {
- N->getOperand(0), Val, N->getOperand(2),
- DAG.getValueType(N->getOperand(1).getValueType())
- };
-
- Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops,
- cast<StoreSDNode>(N)->getMemoryVT(),
- cast<StoreSDNode>(N)->getMemOperand());
- } else {
- unsigned WidthInBytes =
- N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
- SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
-
- SDValue Ops[] = {
- N->getOperand(0), Val, N->getOperand(2), WidthConst,
- DAG.getValueType(N->getOperand(1).getValueType())
- };
- Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
- DAG.getVTList(MVT::Other), Ops,
- cast<StoreSDNode>(N)->getMemoryVT(),
- cast<StoreSDNode>(N)->getMemOperand());
- }
+ unsigned Opcode = N->getOperand(1).getOpcode();
- DCI.AddToWorklist(Val.getNode());
- return Val;
+ if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
+ SDValue Val= combineStoreFPToInt(N, DCI);
+ if (Val)
+ return Val;
}
// Turn STORE (BSWAP) -> sthbrx/stwbrx.
- if (cast<StoreSDNode>(N)->isUnindexed() &&
- N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
- (N->getOperand(1).getValueType() == MVT::i32 ||
- N->getOperand(1).getValueType() == MVT::i16 ||
- (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
- N->getOperand(1).getValueType() == MVT::i64))) {
+ (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
+ (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
+
// STBRX can only handle simple types.
EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
if (mVT.isExtended())
@@ -12296,9 +12517,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
// So it can increase the chance of CSE constant construction.
- EVT VT = N->getOperand(1).getValueType();
if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
- isa<ConstantSDNode>(N->getOperand(1)) && VT == MVT::i32) {
+ isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
// Need to sign-extended to 64-bits to handle negative values.
EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
@@ -12316,8 +12536,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// For little endian, VSX stores require generating xxswapd/lxvd2x.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
- if (VT.isSimple()) {
- MVT StoreVT = VT.getSimpleVT();
+ if (Op1VT.isSimple()) {
+ MVT StoreVT = Op1VT.getSimpleVT();
if (Subtarget.needsSwapsForVSXMemOps() &&
(StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
@@ -13100,14 +13320,21 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// really care overly much here so just give them all the same reg classes.
case 'd':
case 'f':
- if (VT == MVT::f32 || VT == MVT::i32)
- return std::make_pair(0U, &PPC::F4RCRegClass);
- if (VT == MVT::f64 || VT == MVT::i64)
- return std::make_pair(0U, &PPC::F8RCRegClass);
- if (VT == MVT::v4f64 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QFRCRegClass);
- if (VT == MVT::v4f32 && Subtarget.hasQPX())
- return std::make_pair(0U, &PPC::QSRCRegClass);
+ if (Subtarget.hasSPE()) {
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::SPE4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::SPERCRegClass);
+ } else {
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
+ }
break;
case 'v':
if (VT == MVT::v4f64 && Subtarget.hasQPX())
@@ -13590,7 +13817,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::i32;
}
-/// \brief Returns true if it is beneficial to convert a load of a constant
+/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
@@ -13639,6 +13866,9 @@ bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"invalid fpext types");
+ // Extending to float128 is not free.
+ if (DestVT == MVT::f128)
+ return false;
return true;
}
@@ -13695,6 +13925,8 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
case MVT::f32:
case MVT::f64:
return true;
+ case MVT::f128:
+ return (EnableQuadPrecision && Subtarget.hasP9Vector());
default:
break;
}
@@ -13923,3 +14155,20 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// If the function is local then we have a good chance at tail-calling it
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}
+
+bool PPCTargetLowering::
+isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
+ const Value *Mask = AndI.getOperand(1);
+ // If the mask is suitable for andi. or andis. we should sink the and.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
+ // Can't handle constants wider than 64-bits.
+ if (CI->getBitWidth() > 64)
+ return false;
+ int64_t ConstVal = CI->getZExtValue();
+ return isUInt<16>(ConstVal) ||
+ (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
+ }
+
+ // For non-constant masks, we can always use the record-form and.
+ return true;
+}