diff options
Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 427 |
1 files changed, 298 insertions, 129 deletions
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 2b9195b095e17..f7663d8e5185c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,39 +11,88 @@ // //===----------------------------------------------------------------------===// -#include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCCallingConv.h" #include "PPCCCState.h" +#include "PPCFrameLowering.h" +#include "PPCInstrInfo.h" +#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" +#include "PPCRegisterInfo.h" +#include "PPCSubtarget.h" #include "PPCTargetMachine.h" -#include "PPCTargetObjectFile.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> #include <list> +#include <utility> +#include <vector> using namespace llvm; @@ -1525,7 +1574,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { - // Check that the mask is shuffling words for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); @@ -1643,7 +1691,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // If the element isn't a constant, bail fully out. if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); - if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) @@ -2026,7 +2073,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { - // Common code will reject creating a pre-inc form if the base pointer // is a frame index, or if N is a store and the base pointer is either // the same as or a predecessor of the value being stored. Check for @@ -2277,7 +2323,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of @@ -2602,10 +2647,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__trampoline_setup", PtrVT), - std::move(Args)); + CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( + CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; @@ -2737,7 +2781,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -bool +bool llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, @@ -2752,7 +2796,7 @@ llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT, unsigned RegNum = State.getFirstUnallocated(ArgRegs); int RegsLeft = NumArgRegs - RegNum; - // Skip if there is not enough registers left for long double type (4 gpr regs + // Skip if there is not enough registers left for long double type (4 gpr regs // in soft float mode) and put long double argument on the stack. if (RegNum != NumArgRegs && RegsLeft < 4) { for (int i = 0; i < RegsLeft; i++) { @@ -4066,7 +4110,7 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget, static bool hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) { - if (CS->arg_size() != CallerFn->getArgumentList().size()) + if (CS->arg_size() != CallerFn->arg_size()) return false; ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin(); @@ -4222,11 +4266,12 @@ namespace { struct TailCallArgumentInfo { SDValue Arg; SDValue FrameIdxOp; - int FrameIdx; + int FrameIdx = 0; - TailCallArgumentInfo() : FrameIdx(0) {} + TailCallArgumentInfo() = default; }; -} + +} // end anonymous namespace /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void StoreTailCallArgumentsToStackSlot( @@ -4406,7 +4451,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, ImmutableCallSite *CS, const PPCSubtarget &Subtarget) { - bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); bool isELFv2ABI = Subtarget.isELFv2ABI(); @@ -4602,7 +4646,6 @@ SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); @@ -4649,7 +4692,6 @@ SDValue PPCTargetLowering::FinishCall( SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { - std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, @@ -5059,7 +5101,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { - bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); @@ -5105,10 +5146,30 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( }; const unsigned NumGPRs = array_lengthof(GPR); - const unsigned NumFPRs = 13; + const unsigned NumFPRs = useSoftFloat() ? 0 : 13; const unsigned NumVRs = array_lengthof(VR); const unsigned NumQFPRs = NumFPRs; + // On ELFv2, we can avoid allocating the parameter area if all the arguments + // can be passed to the callee in registers. + // For the fast calling convention, there is another check below. + // Note: We should keep consistent with LowerFormalArguments_64SVR4() + bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast; + if (!HasParameterArea) { + unsigned ParamAreaSize = NumGPRs * PtrByteSize; + unsigned AvailableFPRs = NumFPRs; + unsigned AvailableVRs = NumVRs; + unsigned NumBytesTmp = NumBytes; + for (unsigned i = 0; i != NumOps; ++i) { + if (Outs[i].Flags.isNest()) continue; + if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, + PtrByteSize, LinkageSize, ParamAreaSize, + NumBytesTmp, AvailableFPRs, AvailableVRs, + Subtarget.hasQPX())) + HasParameterArea = true; + } + } + // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; @@ -5176,13 +5237,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( unsigned NumBytesActuallyUsed = NumBytes; - // The prolog code of the callee may store up to 8 GPR argument registers to + // In the old ELFv1 ABI, + // the prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. - NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + // In the ELFv2 ABI, we allocate the parameter area iff a callee + // really requires memory operands, e.g. a vararg function. + if (HasParameterArea) + NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + else + NumBytes = LinkageSize; // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && @@ -5401,6 +5467,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (CallConv == CallingConv::Fast) ComputePtrOff(); + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); @@ -5486,6 +5554,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, TailCallArguments, dl); @@ -5520,6 +5590,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( // GPRs when within range. For now, we always put the value in both // locations (or even all three). if (isVarArg) { + assert(HasParameterArea && + "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = @@ -5552,6 +5624,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (CallConv == CallingConv::Fast) ComputePtrOff(); + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); @@ -5572,6 +5646,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( case MVT::v4i1: { bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; if (isVarArg) { + assert(HasParameterArea && + "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = @@ -5604,6 +5680,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (CallConv == CallingConv::Fast) ComputePtrOff(); + assert(HasParameterArea && + "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); @@ -5618,7 +5696,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( } } - assert(NumBytesActuallyUsed == ArgOffset); + assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && + "mismatch in size of parameter area"); (void)NumBytesActuallyUsed; if (!MemOpChains.empty()) @@ -5673,7 +5752,6 @@ SDValue PPCTargetLowering::LowerCall_Darwin( const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const { - unsigned NumOps = Outs.size(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -6065,7 +6143,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); @@ -7612,7 +7689,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); } - } if (Subtarget.hasQPX()) { @@ -7792,24 +7868,39 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = - cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); + cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; isDot = false; switch (IntrinsicID) { - default: return false; - // Comparison predicates. - case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; + default: + return false; + // Comparison predicates. + case Intrinsic::ppc_altivec_vcmpbfp_p: + CompareOpc = 966; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpeqfp_p: + CompareOpc = 198; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpequb_p: + CompareOpc = 6; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpequh_p: + CompareOpc = 70; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpequw_p: + CompareOpc = 134; + isDot = true; + break; case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; - isDot = 1; + isDot = true; } else return false; - break; case Intrinsic::ppc_altivec_vcmpneb_p: case Intrinsic::ppc_altivec_vcmpneh_p: @@ -7818,45 +7909,80 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpnezh_p: case Intrinsic::ppc_altivec_vcmpnezw_p: if (Subtarget.hasP9Altivec()) { - switch(IntrinsicID) { - default: llvm_unreachable("Unknown comparison intrinsic."); - case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; - case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; - case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; - case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; - case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; - case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; + switch (IntrinsicID) { + default: + llvm_unreachable("Unknown comparison intrinsic."); + case Intrinsic::ppc_altivec_vcmpneb_p: + CompareOpc = 7; + break; + case Intrinsic::ppc_altivec_vcmpneh_p: + CompareOpc = 71; + break; + case Intrinsic::ppc_altivec_vcmpnew_p: + CompareOpc = 135; + break; + case Intrinsic::ppc_altivec_vcmpnezb_p: + CompareOpc = 263; + break; + case Intrinsic::ppc_altivec_vcmpnezh_p: + CompareOpc = 327; + break; + case Intrinsic::ppc_altivec_vcmpnezw_p: + CompareOpc = 391; + break; } - isDot = 1; + isDot = true; } else return false; - break; - case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgefp_p: + CompareOpc = 454; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtfp_p: + CompareOpc = 710; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtsb_p: + CompareOpc = 774; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtsh_p: + CompareOpc = 838; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtsw_p: + CompareOpc = 902; + isDot = true; + break; case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; - isDot = 1; + isDot = true; } else return false; - break; - case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; - case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; + case Intrinsic::ppc_altivec_vcmpgtub_p: + CompareOpc = 518; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtuh_p: + CompareOpc = 582; + isDot = true; + break; + case Intrinsic::ppc_altivec_vcmpgtuw_p: + CompareOpc = 646; + isDot = true; + break; case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; - isDot = 1; + isDot = true; } else return false; - break; - // VSX predicate comparisons use the same infrastructure + + // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: case Intrinsic::ppc_vsx_xvcmpgtdp_p: @@ -7865,33 +7991,51 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_vsx_xvcmpgtsp_p: if (Subtarget.hasVSX()) { switch (IntrinsicID) { - case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; - case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; - case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; - case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; - case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; - case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; + case Intrinsic::ppc_vsx_xvcmpeqdp_p: + CompareOpc = 99; + break; + case Intrinsic::ppc_vsx_xvcmpgedp_p: + CompareOpc = 115; + break; + case Intrinsic::ppc_vsx_xvcmpgtdp_p: + CompareOpc = 107; + break; + case Intrinsic::ppc_vsx_xvcmpeqsp_p: + CompareOpc = 67; + break; + case Intrinsic::ppc_vsx_xvcmpgesp_p: + CompareOpc = 83; + break; + case Intrinsic::ppc_vsx_xvcmpgtsp_p: + CompareOpc = 75; + break; } - isDot = 1; - } - else + isDot = true; + } else return false; - break; - // Normal Comparisons. - case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; + // Normal Comparisons. + case Intrinsic::ppc_altivec_vcmpbfp: + CompareOpc = 966; + break; + case Intrinsic::ppc_altivec_vcmpeqfp: + CompareOpc = 198; + break; + case Intrinsic::ppc_altivec_vcmpequb: + CompareOpc = 6; + break; + case Intrinsic::ppc_altivec_vcmpequh: + CompareOpc = 70; + break; + case Intrinsic::ppc_altivec_vcmpequw: + CompareOpc = 134; + break; case Intrinsic::ppc_altivec_vcmpequd: - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec()) CompareOpc = 199; - isDot = 0; - } else + else return false; - break; case Intrinsic::ppc_altivec_vcmpneb: case Intrinsic::ppc_altivec_vcmpneh: @@ -7899,43 +8043,67 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, case Intrinsic::ppc_altivec_vcmpnezb: case Intrinsic::ppc_altivec_vcmpnezh: case Intrinsic::ppc_altivec_vcmpnezw: - if (Subtarget.hasP9Altivec()) { + if (Subtarget.hasP9Altivec()) switch (IntrinsicID) { - default: llvm_unreachable("Unknown comparison intrinsic."); - case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; - case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; - case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; - case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; - case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; - case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; + default: + llvm_unreachable("Unknown comparison intrinsic."); + case Intrinsic::ppc_altivec_vcmpneb: + CompareOpc = 7; + break; + case Intrinsic::ppc_altivec_vcmpneh: + CompareOpc = 71; + break; + case Intrinsic::ppc_altivec_vcmpnew: + CompareOpc = 135; + break; + case Intrinsic::ppc_altivec_vcmpnezb: + CompareOpc = 263; + break; + case Intrinsic::ppc_altivec_vcmpnezh: + CompareOpc = 327; + break; + case Intrinsic::ppc_altivec_vcmpnezw: + CompareOpc = 391; + break; } - isDot = 0; - } else + else return false; break; - case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgefp: + CompareOpc = 454; + break; + case Intrinsic::ppc_altivec_vcmpgtfp: + CompareOpc = 710; + break; + case Intrinsic::ppc_altivec_vcmpgtsb: + CompareOpc = 774; + break; + case Intrinsic::ppc_altivec_vcmpgtsh: + CompareOpc = 838; + break; + case Intrinsic::ppc_altivec_vcmpgtsw: + CompareOpc = 902; + break; case Intrinsic::ppc_altivec_vcmpgtsd: - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec()) CompareOpc = 967; - isDot = 0; - } else + else return false; - break; - case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; - case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; + case Intrinsic::ppc_altivec_vcmpgtub: + CompareOpc = 518; + break; + case Intrinsic::ppc_altivec_vcmpgtuh: + CompareOpc = 582; + break; + case Intrinsic::ppc_altivec_vcmpgtuw: + CompareOpc = 646; + break; case Intrinsic::ppc_altivec_vcmpgtud: - if (Subtarget.hasP8Altivec()) { + if (Subtarget.hasP8Altivec()) CompareOpc = 711; - isDot = 0; - } else + else return false; - break; } return true; @@ -8044,7 +8212,7 @@ SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -9174,10 +9342,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineFunction *F = BB->getParent(); - if (Subtarget.hasISEL() && - (MI.getOpcode() == PPC::SELECT_CC_I4 || + if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || - MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) { + MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) { SmallVector<MachineOperand, 2> Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) @@ -9417,7 +9584,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); - else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && @@ -10028,14 +10194,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { return false; } - /// This function is called when we have proved that a SETCC node can be replaced /// by subtraction (and other supporting instructions) so that the result of /// comparison is kept in a GPR instead of CR. This function is purely for /// codegen purposes and has some flags to guide the codegen process. static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG) { - assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); // Zero extend the operands to the largest legal integer. Originally, they @@ -10068,7 +10232,6 @@ static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const { - assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); SelectionDAG &DAG = DCI.DAG; @@ -11227,9 +11390,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); + // If the type of BSWAP operand is wider than stored memory width + // it need to be shifted to the right side before STBRX. + EVT mVT = cast<StoreSDNode>(N)->getMemoryVT(); + if (Op1VT.bitsGT(mVT)) { + int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); + BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, + DAG.getConstant(Shift, dl, MVT::i32)); + // Need to truncate if this is a bswap of i64 stored as i32/i16. + if (Op1VT == MVT::i64) + BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); + } + SDValue Ops[] = { - N->getOperand(0), BSwapOp, N->getOperand(2), - DAG.getValueType(N->getOperand(1).getValueType()) + N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), @@ -11570,7 +11744,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } break; - case ISD::INTRINSIC_W_CHAIN: { + case ISD::INTRINSIC_W_CHAIN: // For little endian, VSX loads require generating lxvd2x/xxswapd. // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (Subtarget.needsSwapsForVSXMemOps()) { @@ -11583,8 +11757,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } break; - } - case ISD::INTRINSIC_VOID: { + case ISD::INTRINSIC_VOID: // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Subtarget.needsSwapsForVSXMemOps()) { @@ -11597,7 +11770,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } break; - } case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && @@ -11635,9 +11807,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Return N so it doesn't get rechecked! return SDValue(N, 0); } - break; - case PPCISD::VCMP: { + case PPCISD::VCMP: // If a VCMPo node already exists with exactly the same operands as this // node, use its result instead of this node (VCMPo computes both a CR6 and // a normal output). @@ -11687,7 +11858,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(VCMPoNode, 0); } break; - } case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); @@ -11847,6 +12017,7 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); @@ -12295,7 +12466,6 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { - switch (Intrinsic) { case Intrinsic::ppc_qpx_qvlfd: case Intrinsic::ppc_qpx_qvlfs: @@ -12753,7 +12923,6 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const { } bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - if (!VT.isSimple() || !Subtarget.hasVSX()) return false; |