summaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp427
1 files changed, 298 insertions, 129 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2b9195b095e17..f7663d8e5185c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11,39 +11,88 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCCallingConv.h"
#include "PPCCCState.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
+#include "PPCRegisterInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "PPCTargetObjectFile.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
#include <list>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -1525,7 +1574,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
-
// Check that the mask is shuffling words
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
@@ -1643,7 +1691,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// If the element isn't a constant, bail fully out.
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
-
if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
@@ -2026,7 +2073,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
}
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
-
// Common code will reject creating a pre-inc form if the base pointer
// is a frame index, or if N is a store and the base pointer is either
// the same as or a predecessor of the value being stored. Check for
@@ -2277,7 +2323,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
-
// FIXME: TLS addresses currently use medium model code sequences,
// which is the most useful form. Eventually support for small and
// large models could be added if users need it, at the cost of
@@ -2602,10 +2647,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2737,7 +2781,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-bool
+bool
llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -2752,7 +2796,7 @@ llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
int RegsLeft = NumArgRegs - RegNum;
- // Skip if there is not enough registers left for long double type (4 gpr regs
+ // Skip if there is not enough registers left for long double type (4 gpr regs
// in soft float mode) and put long double argument on the stack.
if (RegNum != NumArgRegs && RegsLeft < 4) {
for (int i = 0; i < RegsLeft; i++) {
@@ -4066,7 +4110,7 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
static bool
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
- if (CS->arg_size() != CallerFn->getArgumentList().size())
+ if (CS->arg_size() != CallerFn->arg_size())
return false;
ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
@@ -4222,11 +4266,12 @@ namespace {
struct TailCallArgumentInfo {
SDValue Arg;
SDValue FrameIdxOp;
- int FrameIdx;
+ int FrameIdx = 0;
- TailCallArgumentInfo() : FrameIdx(0) {}
+ TailCallArgumentInfo() = default;
};
-}
+
+} // end anonymous namespace
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void StoreTailCallArgumentsToStackSlot(
@@ -4406,7 +4451,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
-
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
@@ -4602,7 +4646,6 @@ SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -4649,7 +4692,6 @@ SDValue PPCTargetLowering::FinishCall(
SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
-
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
@@ -5059,7 +5101,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@@ -5105,10 +5146,30 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
};
const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
+ const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
const unsigned NumQFPRs = NumFPRs;
+ // On ELFv2, we can avoid allocating the parameter area if all the arguments
+ // can be passed to the callee in registers.
+ // For the fast calling convention, there is another check below.
+ // Note: We should keep consistent with LowerFormalArguments_64SVR4()
+ bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
+ if (!HasParameterArea) {
+ unsigned ParamAreaSize = NumGPRs * PtrByteSize;
+ unsigned AvailableFPRs = NumFPRs;
+ unsigned AvailableVRs = NumVRs;
+ unsigned NumBytesTmp = NumBytes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Outs[i].Flags.isNest()) continue;
+ if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
+ HasParameterArea = true;
+ }
+ }
+
// When using the fast calling convention, we don't provide backing for
// arguments that will be in registers.
unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
@@ -5176,13 +5237,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
unsigned NumBytesActuallyUsed = NumBytes;
- // The prolog code of the callee may store up to 8 GPR argument registers to
+ // In the old ELFv1 ABI,
+ // the prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ // In the ELFv2 ABI, we allocate the parameter area iff a callee
+ // really requires memory operands, e.g. a vararg function.
+ if (HasParameterArea)
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ else
+ NumBytes = LinkageSize;
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -5401,6 +5467,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5486,6 +5554,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5520,6 +5590,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5552,6 +5624,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5572,6 +5646,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5604,6 +5680,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5618,7 +5696,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
}
}
- assert(NumBytesActuallyUsed == ArgOffset);
+ assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
+ "mismatch in size of parameter area");
(void)NumBytesActuallyUsed;
if (!MemOpChains.empty())
@@ -5673,7 +5752,6 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
unsigned NumOps = Outs.size();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -6065,7 +6143,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -7612,7 +7689,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
}
-
}
if (Subtarget.hasQPX()) {
@@ -7792,24 +7868,39 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
- cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
isDot = false;
switch (IntrinsicID) {
- default: return false;
- // Comparison predicates.
- case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ default:
+ return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ CompareOpc = 966;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ CompareOpc = 198;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ CompareOpc = 6;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ CompareOpc = 70;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ CompareOpc = 134;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 199;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb_p:
case Intrinsic::ppc_altivec_vcmpneh_p:
@@ -7818,45 +7909,80 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezh_p:
case Intrinsic::ppc_altivec_vcmpnezw_p:
if (Subtarget.hasP9Altivec()) {
- switch(IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb_p:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh_p:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew_p:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb_p:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh_p:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw_p:
+ CompareOpc = 391;
+ break;
}
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ CompareOpc = 454;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ CompareOpc = 710;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ CompareOpc = 774;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ CompareOpc = 838;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ CompareOpc = 902;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 967;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ CompareOpc = 518;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ CompareOpc = 582;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ CompareOpc = 646;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 711;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- // VSX predicate comparisons use the same infrastructure
+
+ // VSX predicate comparisons use the same infrastructure
case Intrinsic::ppc_vsx_xvcmpeqdp_p:
case Intrinsic::ppc_vsx_xvcmpgedp_p:
case Intrinsic::ppc_vsx_xvcmpgtdp_p:
@@ -7865,33 +7991,51 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_vsx_xvcmpgtsp_p:
if (Subtarget.hasVSX()) {
switch (IntrinsicID) {
- case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
- case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
- case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
- case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
- case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
- case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ CompareOpc = 99;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ CompareOpc = 115;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ CompareOpc = 107;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ CompareOpc = 67;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ CompareOpc = 83;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ CompareOpc = 75;
+ break;
}
- isDot = 1;
- }
- else
+ isDot = true;
+ } else
return false;
-
break;
- // Normal Comparisons.
- case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp:
+ CompareOpc = 966;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp:
+ CompareOpc = 198;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb:
+ CompareOpc = 6;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh:
+ CompareOpc = 70;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw:
+ CompareOpc = 134;
+ break;
case Intrinsic::ppc_altivec_vcmpequd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 199;
- isDot = 0;
- } else
+ else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb:
case Intrinsic::ppc_altivec_vcmpneh:
@@ -7899,43 +8043,67 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezb:
case Intrinsic::ppc_altivec_vcmpnezh:
case Intrinsic::ppc_altivec_vcmpnezw:
- if (Subtarget.hasP9Altivec()) {
+ if (Subtarget.hasP9Altivec())
switch (IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw:
+ CompareOpc = 391;
+ break;
}
- isDot = 0;
- } else
+ else
return false;
break;
- case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp:
+ CompareOpc = 454;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp:
+ CompareOpc = 710;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb:
+ CompareOpc = 774;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh:
+ CompareOpc = 838;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw:
+ CompareOpc = 902;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 967;
- isDot = 0;
- } else
+ else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub:
+ CompareOpc = 518;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh:
+ CompareOpc = 582;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw:
+ CompareOpc = 646;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 711;
- isDot = 0;
- } else
+ else
return false;
-
break;
}
return true;
@@ -8044,7 +8212,7 @@ SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -9174,10 +9342,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
- if (Subtarget.hasISEL() &&
- (MI.getOpcode() == PPC::SELECT_CC_I4 ||
+ if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
+ MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -9417,7 +9584,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
BB = EmitAtomicBinary(MI, BB, 8, 0);
-
else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
(Subtarget.hasPartwordAtomics() &&
@@ -10028,14 +10194,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
-
/// This function is called when we have proved that a SETCC node can be replaced
/// by subtraction (and other supporting instructions) so that the result of
/// comparison is kept in a GPR instead of CR. This function is purely for
/// codegen purposes and has some flags to guide the codegen process.
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
bool Swap, SDLoc &DL, SelectionDAG &DAG) {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
// Zero extend the operands to the largest legal integer. Originally, they
@@ -10068,7 +10232,6 @@ static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
DAGCombinerInfo &DCI) const {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
SelectionDAG &DAG = DCI.DAG;
@@ -11227,9 +11390,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+ // If the type of BSWAP operand is wider than stored memory width
+ // it need to be shifted to the right side before STBRX.
+ EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
+ if (Op1VT.bitsGT(mVT)) {
+ int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
+ BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
+ DAG.getConstant(Shift, dl, MVT::i32));
+ // Need to truncate if this is a bswap of i64 stored as i32/i16.
+ if (Op1VT == MVT::i64)
+ BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
+ }
+
SDValue Ops[] = {
- N->getOperand(0), BSwapOp, N->getOperand(2),
- DAG.getValueType(N->getOperand(1).getValueType())
+ N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
@@ -11570,7 +11744,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
- case ISD::INTRINSIC_W_CHAIN: {
+ case ISD::INTRINSIC_W_CHAIN:
// For little endian, VSX loads require generating lxvd2x/xxswapd.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11583,8 +11757,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
- case ISD::INTRINSIC_VOID: {
+ case ISD::INTRINSIC_VOID:
// For little endian, VSX stores require generating xxswapd/stxvd2x.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11597,7 +11770,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -11635,9 +11807,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
-
break;
- case PPCISD::VCMP: {
+ case PPCISD::VCMP:
// If a VCMPo node already exists with exactly the same operands as this
// node, use its result instead of this node (VCMPo computes both a CR6 and
// a normal output).
@@ -11687,7 +11858,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue(VCMPoNode, 0);
}
break;
- }
case ISD::BRCOND: {
SDValue Cond = N->getOperand(1);
SDValue Target = N->getOperand(2);
@@ -11847,6 +12017,7 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
@@ -12295,7 +12466,6 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
-
switch (Intrinsic) {
case Intrinsic::ppc_qpx_qvlfd:
case Intrinsic::ppc_qpx_qvlfs:
@@ -12753,7 +12923,6 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
}
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-
if (!VT.isSimple() || !Subtarget.hasVSX())
return false;