From 044eb2f6afba375a914ac9d8024f8f5142bb912e Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 18 Dec 2017 20:10:56 +0000 Subject: Vendor import of llvm trunk r321017: https://llvm.org/svn/llvm-project/llvm/trunk@321017 --- lib/Target/ARM/ARMISelLowering.cpp | 800 +++++++++++++++++++++++++------------ 1 file changed, 542 insertions(+), 258 deletions(-) (limited to 'lib/Target/ARM/ARMISelLowering.cpp') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 27dda93387b6f..1b4d7ff508489 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// +//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -56,6 +57,11 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -93,7 +99,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include @@ -221,19 +226,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, RegInfo = Subtarget->getRegisterInfo(); Itins = Subtarget->getInstrItineraryData(); + setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && !Subtarget->isTargetWatchOS()) { - const auto &E = Subtarget->getTargetTriple().getEnvironment(); - - bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF || - E == Triple::MuslEABIHF; - // Windows is a special case. Technically, we will replace all of the "GNU" - // calls with calls to MSVCRT if appropriate and adjust the calling - // convention then. - IsHFTarget = IsHFTarget || Subtarget->isTargetWindows(); - + bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast(LCID), IsHFTarget ? CallingConv::ARM_AAPCS_VFP @@ -801,6 +799,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -1562,7 +1563,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, bool isVarArg) const { switch (CC) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::ARM_AAPCS: case CallingConv::ARM_APCS: case CallingConv::GHC: @@ -1611,7 +1612,7 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool isVarArg) const { switch (getEffectiveCallingConv(CC, isVarArg)) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); case CallingConv::ARM_AAPCS: @@ -1634,7 +1635,6 @@ SDValue ARMTargetLowering::LowerCallResult( const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { - // Assign locations to each value returned by this call. SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, @@ -1732,7 +1732,6 @@ void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue &StackPtr, SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const { - SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); unsigned id = Subtarget->isLittle() ? 0 : 1; @@ -1774,7 +1773,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; - auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") @@ -1783,9 +1782,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), + isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG); - if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically @@ -1982,7 +1981,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isDirect = false; const TargetMachine &TM = getTargetMachine(); - const Module *Mod = MF.getFunction()->getParent(); + const Module *Mod = MF.getFunction().getParent(); const GlobalValue *GV = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) GV = G->getGlobal(); @@ -2032,9 +2031,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // more times in this block, we can improve codesize by calling indirectly // as BLXr has a 16-bit encoding. auto *GV = cast(Callee)->getGlobal(); - auto *BB = CLI.CS->getParent(); + auto *BB = CLI.CS.getParent(); bool PreferIndirect = - Subtarget->isThumb() && MF.getFunction()->optForMinSize() && + Subtarget->isThumb() && MF.getFunction().optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; @@ -2106,7 +2105,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority - !MF.getFunction()->optForMinSize()) + !MF.getFunction().optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -2281,18 +2280,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); assert(Subtarget->supportsTailCall()); + // Tail calls to function pointers cannot be optimized for Thumb1 if the args + // to the call take up r0-r3. The reason is that there are no legal registers + // left to hold the pointer to the function to be called. + if (Subtarget->isThumb1Only() && Outs.size() >= 4 && + !isa(Callee.getNode())) + return false; + // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF->hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) return false; // Also avoid sibcall optimization if either caller or callee uses struct @@ -2404,9 +2410,9 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); - StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset // version of the "preferred return address". These offsets affect the return @@ -2440,7 +2446,6 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; @@ -2548,7 +2553,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // // M-class CPUs actually use a normal return sequence with a special // (hardware-provided) value in LR, so the normal code path works. - if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && !Subtarget->isMClass()) { if (Subtarget->isThumb1Only()) report_fatal_error("interrupt attribute is not supported in Thumb1"); @@ -2686,7 +2691,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, auto T = const_cast(CP->getType()); auto C = const_cast(CP->getConstVal()); auto M = const_cast(DAG.getMachineFunction(). - getFunction()->getParent()); + getFunction().getParent()); auto GV = new GlobalVariable( *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + @@ -2768,7 +2773,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDValue ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); + assert(Subtarget->isTargetDarwin() && + "This function expects a Darwin target"); SDLoc DL(Op); // First step is to get the address of the actua global symbol. This is where @@ -2794,7 +2800,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be // silly). auto TRI = - getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); + getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); auto ARI = static_cast(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); @@ -2960,6 +2966,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + GlobalAddressSDNode *GA = cast(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); @@ -2968,10 +2978,6 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); - GlobalAddressSDNode *GA = cast(Op); - if (DAG.getTarget().Options.EmulatedTLS) - return LowerToTLSEmulatedModel(GA, DAG); - TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); switch (model) { @@ -3049,7 +3055,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // This is a win if the constant is only used in one function (so it doesn't // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll @@ -3107,7 +3113,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // in multiple functions but it no larger than a pointer. We also check if // GVar has constant (non-ConstantExpr) users. If so, it essentially has its // address taken. - if (!allUsersAreInFunction(GVar, F) && + if (!allUsersAreInFunction(GVar, &F) && !(Size <= 4 && allUsersAreInFunctions(GVar))) return SDValue(); @@ -3134,7 +3140,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } -static bool isReadOnly(const GlobalValue *GV) { +bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { if (const GlobalAlias *GA = dyn_cast(GV)) GV = GA->getBaseObject(); return (isa(GV) && cast(GV)->isConstant()) || @@ -3169,28 +3175,12 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); - - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc dl(Op); - unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( - GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, - UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, - /*AddCurrentAddress=*/UseGOT_PREL); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad( - PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); - SDValue Chain = Result.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + UseGOT_PREL ? ARMII::MO_GOT : 0); + SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); if (UseGOT_PREL) Result = - DAG.getLoad(PtrVT, dl, Chain, Result, + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } else if (Subtarget->isROPI() && IsRO) { @@ -3332,7 +3322,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, bool IsPositionIndependent = isPositionIndependent(); unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, + ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -3608,7 +3598,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( SmallVector ArgValues; SDValue ArgValue; - Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; // Initially ArgRegsSaveSize is zero. @@ -3690,7 +3680,6 @@ SDValue ARMTargetLowering::LowerFormalArguments( DAG.getIntPtrConstant(1, dl)); } else ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); - } else { const TargetRegisterClass *RC; @@ -3733,7 +3722,6 @@ SDValue ARMTargetLowering::LowerFormalArguments( } InVals.push_back(ArgValue); - } else { // VA.isRegLoc() // sanity check assert(VA.isMemLoc()); @@ -3853,6 +3841,12 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } } + } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && + (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { + // In ARM and Thumb-2, the compare instructions can shift their second + // operand. + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(LHS, RHS); } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); @@ -3952,7 +3946,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, } SDValue -ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { +ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); @@ -3974,6 +3968,66 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, + SelectionDAG &DAG) { + SDLoc DL(BoolCarry); + EVT CarryVT = BoolCarry.getValueType(); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + // This converts the boolean value carry into the carry flag by doing + // ARMISD::ADDC Carry, ~0 + return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32), + BoolCarry, DAG.getConstant(NegOne, DL, CarryVT)); +} + +static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, + SelectionDAG &DAG) { + SDLoc DL(Flags); + + // Now convert the carry flag into a boolean carry. We do this + // using ARMISD:ADDE 0, 0, Carry + return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), Flags); +} + +SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, + SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc dl(Op); + + EVT VT = Op.getValueType(); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Value; + SDValue Overflow; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::UADDO: + Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + break; + case ISD::USUBO: { + Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow + // value. So compute 1 - C. + Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(1, dl, MVT::i32), Overflow); + break; + } + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4518,7 +4572,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table // which does another jump to the destination. This also makes it easier @@ -4532,7 +4586,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); - Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); + Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = @@ -4935,7 +4989,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CCR, CmpLo); - SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, @@ -5370,7 +5423,6 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). if (Opc == ARMISD::VCEQ) { - SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; @@ -5800,6 +5852,13 @@ static bool isVTBLMask(ArrayRef M, EVT VT) { return VT == MVT::v8i8 && M.size() == 8; } +static unsigned SelectPairHalf(unsigned Elements, ArrayRef Mask, + unsigned Index) { + if (Mask.size() == Elements * 2) + return Index / Elements; + return Mask[Index] == 0 ? 0 : 1; +} + // Checks whether the shuffle mask represents a vector transpose (VTRN) by // checking that pairs of elements in the shuffle mask represent the same index // in each vector, incrementing the expected index by 2 at each step. @@ -5836,10 +5895,7 @@ static bool isVTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) @@ -5866,10 +5922,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) @@ -5901,10 +5954,7 @@ static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; @@ -5935,10 +5985,7 @@ static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { @@ -5978,10 +6025,7 @@ static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6014,10 +6058,7 @@ static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6532,9 +6573,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. -bool -ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, - EVT VT) const { +bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; @@ -7392,6 +7431,53 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + SDValue Carry = Op.getOperand(2); + EVT CarryVT = Carry.getValueType(); + + SDLoc DL(Op); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + + SDValue Result; + if (Op.getOpcode() == ISD::ADDCARRY) { + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the addition proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + } else { + // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we + // have to invert the carry first. + Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32), Carry); + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the subtraction proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + // But the carry returned by ARMISD::SUBE is not a borrow as expected + // by ISD::SUBCARRY, so compute 1 - C. + Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32), Carry); + } + + // Return both values. + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); +} + SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); @@ -7668,9 +7754,9 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SDValue InChain = DAG.getEntryNode(); SDValue TCChain = InChain; - const auto *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && - F->getReturnType() == LCRTy; + F.getReturnType() == LCRTy; if (IsTC) InChain = TCChain; @@ -7686,6 +7772,7 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); @@ -7746,11 +7833,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: - case ISD::UADDO: case ISD::SSUBO: + return LowerSignedALUO(Op, DAG); + case ISD::UADDO: case ISD::USUBO: - return LowerXALUO(Op, DAG); + return LowerUnsignedALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -7864,7 +7954,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); ARMFunctionInfo *AFI = MF->getInfo(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); @@ -7872,7 +7962,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, unsigned PCLabelId = AFI->createPICLabelUId(); unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; ARMConstantPoolValue *CPV = - ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); + ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass @@ -8158,7 +8248,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. @@ -8259,7 +8349,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. @@ -8555,7 +8645,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && + if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; @@ -8661,7 +8751,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. @@ -8797,7 +8887,6 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, switch (TM.getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: - case CodeModel::Default: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) .add(predOps(ARMCC::AL)) @@ -8809,8 +8898,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; - case CodeModel::Large: - case CodeModel::JITDefault: { + case CodeModel::Large: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); @@ -8886,8 +8974,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { + MachineOperand Def(MI.getOperand(1)); + if (TargetRegisterInfo::isPhysicalRegister(Def.getReg())) + Def.setIsRenamable(false); BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) - .add(MI.getOperand(1)) // Rn_wb + .add(Def) // Rn_wb .add(MI.getOperand(2)) // Rn .add(MI.getOperand(3)) // PredImm .add(MI.getOperand(4)) // PredReg @@ -9193,7 +9284,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // - // e.g. ADCS (..., CPSR) -> ADC (... opt:CPSR). + // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); @@ -9612,7 +9703,6 @@ static SDValue findMUL_LOHI(SDValue V) { static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb()) { if (!Subtarget->hasDSP()) return SDValue(); @@ -9701,11 +9791,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi - // / \ [no multiline comment] - // loAdd -> ADDE | - // \ :glue / - // \ / - // ADDC <- hiAdd + // V \ [no multiline comment] + // loAdd -> ADDC | + // \ :carry / + // V V + // ADDE <- hiAdd // assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); @@ -9713,7 +9803,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, AddeNode->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"); - // Check that we have a glued ADDC node. + // Check that we are chained to the right ADDC node. SDNode* AddcNode = AddeNode->getOperand(2).getNode(); if (AddcNode->getOpcode() != ARMISD::ADDC) return SDValue(); @@ -9764,7 +9854,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; - // Ensure that ADDE is from high result of ISD::SMUL_LOHI. + // Ensure that ADDE is from high result of ISD::xMUL_LOHI. if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) return SDValue(); @@ -9789,6 +9879,12 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, if (!LoMul) return SDValue(); + // If HiAdd is the same node as ADDC or is a predecessor of ADDC the + // replacement below will create a cycle. + if (AddcNode == HiAdd->getNode() || + AddcNode->isPredecessorOf(HiAdd->getNode())) + return SDValue(); + // Create the merged node. SelectionDAG &DAG = DCI.DAG; @@ -9852,7 +9948,6 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && isNullConstant(AddeNode->getOperand(1)))) { - SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), UmlalNode->getOperand(2), AddHi }; @@ -9891,13 +9986,27 @@ static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, +static SDValue PerformAddcSubcCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + SelectionDAG &DAG(DCI.DAG); + + if (N->getOpcode() == ARMISD::ADDC) { + // (ADDC (ADDE 0, 0, C), -1) -> C + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS->getOpcode() == ARMISD::ADDE && + isNullConstant(LHS->getOperand(0)) && + isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) { + return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); + } + } + if (Subtarget->isThumb1Only()) { SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast(RHS)) { int32_t imm = C->getSExtValue(); - if (imm < 0 && imm > INT_MIN) { + if (imm < 0 && imm > std::numeric_limits::min()) { SDLoc DL(N); RHS = DAG.getConstant(-imm, DL, MVT::i32); unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC @@ -9974,6 +10083,102 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } +static SDValue PerformSHLSimplify(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *ST) { + // Allow the generic combiner to identify potential bswaps. + if (DCI.isBeforeLegalize()) + return SDValue(); + + // DAG combiner will fold: + // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2 + // Other code patterns that can be also be modified have the following form: + // b + ((a << 1) | 510) + // b + ((a << 1) & 510) + // b + ((a << 1) ^ 510) + // b + ((a << 1) + 510) + + // Many instructions can perform the shift for free, but it requires both + // the operands to be registers. If c1 << c2 is too large, a mov immediate + // instruction will needed. So, unfold back to the original pattern if: + // - if c1 and c2 are small enough that they don't require mov imms. + // - the user(s) of the node can perform an shl + + // No shifted operands for 16-bit instructions. + if (ST->isThumb() && ST->isThumb1Only()) + return SDValue(); + + // Check that all the users could perform the shl themselves. + for (auto U : N->uses()) { + switch(U->getOpcode()) { + default: + return SDValue(); + case ISD::SUB: + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SETCC: + case ARMISD::CMP: + // Check that its not already using a shl. + if (U->getOperand(0).getOpcode() == ISD::SHL || + U->getOperand(1).getOpcode() == ISD::SHL) + return SDValue(); + break; + } + } + + if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR && + N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::SHL) + return SDValue(); + + SDValue SHL = N->getOperand(0); + + auto *C1ShlC2 = dyn_cast(N->getOperand(1)); + auto *C2 = dyn_cast(SHL.getOperand(1)); + if (!C1ShlC2 || !C2) + return SDValue(); + + DEBUG(dbgs() << "Trying to simplify shl: "; N->dump()); + + APInt C2Int = C2->getAPIntValue(); + APInt C1Int = C1ShlC2->getAPIntValue(); + + // Check that performing a lshr will not lose any information. + APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(), + C2Int.getBitWidth() - C2->getZExtValue()); + if ((C1Int & Mask) != C1Int) + return SDValue(); + + // Shift the first constant. + C1Int.lshrInPlace(C2Int); + + // The immediates are encoded as an 8-bit value that can be rotated. + unsigned Zeros = C1Int.countLeadingZeros() + C1Int.countTrailingZeros(); + if (C1Int.getBitWidth() - Zeros > 8) + return SDValue(); + + Zeros = C2Int.countLeadingZeros() + C2Int.countTrailingZeros(); + if (C2Int.getBitWidth() - Zeros > 8) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + SDValue X = SHL.getOperand(0); + SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X, + DAG.getConstant(C1Int, dl, MVT::i32)); + // Shift left to compensate for the lshr of C1Int. + SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1)); + + DAG.ReplaceAllUsesWith(SDValue(N, 0), Res); + return SDValue(N, 0); +} + + /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, @@ -9982,6 +10187,10 @@ static SDValue PerformADDCombine(SDNode *N, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + // Only works one way, because it needs an immediate operand. + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; + // First try with the default operand order. if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; @@ -10121,7 +10330,6 @@ static SDValue PerformMULCombine(SDNode *N, MVT::i32))); Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i32), Res); - } else return SDValue(); } @@ -10171,6 +10379,9 @@ static SDValue PerformANDCombine(SDNode *N, // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; + + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; } return SDValue(); @@ -10237,95 +10448,17 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR, return SDValue(OR, 0); } -/// PerformORCombine - Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { - // Attempt to use immediate-form VORR - BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); - SDLoc dl(N); - EVT VT = N->getValueType(0); - SelectionDAG &DAG = DCI.DAG; - - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - EVT VorrVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VorrVT, VT.is128BitVector(), - OtherModImm); - if (Val.getNode()) { - SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); - SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); - } - } - } - - if (!Subtarget->isThumb1Only()) { - // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) - if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) - return Result; - if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) - return Result; - } - - // The code below optimizes (or (and X, Y), Z). - // The AND operand needs to have a single user to make these optimizations - // profitable. - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) - return SDValue(); - SDValue N1 = N->getOperand(1); - - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. - if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && - DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - APInt SplatBits0, SplatBits1; - BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); - BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); - // Ensure that the second operand of both ands are constants - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - // Ensure that the bit width of the constants are the same and that - // the splat arguments are logical inverses as per the pattern we - // are trying to simplify. - if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection - // simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), - N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } - } - } - } - - // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when - // reasonable. - +static SDValue PerformORCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val @@ -10367,9 +10500,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask @@ -10393,9 +10527,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, @@ -10409,9 +10544,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } @@ -10429,10 +10565,109 @@ static SDValue PerformORCombine(SDNode *N, Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); + } + + return SDValue(); +} + +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Attempt to use immediate-form VORR + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); + SDLoc dl(N); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && Subtarget->hasNEON() && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VorrVT; + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, dl, VorrVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + } + } } + if (!Subtarget->isThumb1Only()) { + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) + return Result; + if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) + return Result; + } + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + APInt SplatBits0, SplatBits1; + BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } + } + + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget)) + return Res; + } + + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; + return SDValue(); } @@ -10449,6 +10684,9 @@ static SDValue PerformXORCombine(SDNode *N, // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; + + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; } return SDValue(); @@ -11781,6 +12019,14 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static const APInt *isPowerOf2Constant(SDValue V) { + ConstantSDNode *C = dyn_cast(V); + if (!C) + return nullptr; + const APInt *CV = &C->getAPIntValue(); + return CV->isPowerOf2() ? CV : nullptr; +} + SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11809,8 +12055,8 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); - ConstantSDNode *AndC = dyn_cast(And->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); + if (!AndC) return SDValue(); SDValue X = And->getOperand(0); @@ -11850,7 +12096,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); - unsigned BitInX = AndC->getAPIntValue().logBase2(); + unsigned BitInX = AndC->logBase2(); if (BitInX != 0) { // We must shift X first. @@ -12011,7 +12257,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::ADDC: - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); @@ -12171,11 +12417,11 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); // See if we can use NEON instructions for this... if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + !F.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || @@ -12193,6 +12439,26 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::Other; } +// 64-bit integers are split into their high and low parts and held in two +// different registers, so the trunc is free since the low register can just +// be used. +bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); + unsigned DestBits = DstTy->getPrimitiveSizeInBits(); + return (SrcBits == 64 && DestBits == 32); +} + +bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { + if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || + !DstVT.isInteger()) + return false; + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DstVT.getSizeInBits(); + return (SrcBits == 64 && DestBits == 32); +} + bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; @@ -12261,7 +12527,6 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, return -1; } - static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -12377,8 +12642,13 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, Scale = Scale & ~1; return Scale == 2 || Scale == 4 || Scale == 8; case MVT::i64: + // FIXME: What are we trying to model here? ldrd doesn't have an r + r + // version in Thumb mode. // r + r - if (((unsigned)AM.HasBaseReg + Scale) <= 2) + if (Scale == 1) + return true; + // r * 2 (this can be lowered to r + r). + if (!AM.HasBaseReg && Scale == 2) return true; return false; case MVT::isVoid: @@ -12392,11 +12662,26 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, } } +bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM, + EVT VT) const { + const int Scale = AM.Scale; + + // Negative scales are not supported in Thumb1. + if (Scale < 0) + return false; + + // Thumb1 addressing modes do not support register scaling excepting the + // following cases: + // 1. Scale == 1 means no scaling. + // 2. Scale == 2 this can be lowered to r + r if there is no base register. + return (Scale == 1) || (!AM.HasBaseReg && Scale == 2); +} + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -12408,10 +12693,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, switch (AM.Scale) { case 0: // no scale reg, must be "r+i" or "r", or "i". break; - case 1: - if (Subtarget->isThumb1Only()) - return false; - LLVM_FALLTHROUGH; default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) @@ -12420,6 +12701,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!VT.isSimple()) return false; + if (Subtarget->isThumb1Only()) + return isLegalT1ScaledAddressingMode(AM, VT); + if (Subtarget->isThumb2()) return isLegalT2ScaledAddressingMode(AM, VT); @@ -12436,8 +12720,11 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, return isPowerOf2_32(Scale & ~1); case MVT::i16: case MVT::i64: - // r + r - if (((unsigned)AM.HasBaseReg + Scale) <= 2) + // r +/- r + if (Scale == 1 || (AM.HasBaseReg && Scale == -1)) + return true; + // r * 2 (this can be lowered to r + r). + if (!AM.HasBaseReg && Scale == 2) return true; return false; @@ -12685,10 +12972,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: - // These nodes' second result is a boolean - if (Op.getResNo() == 0) - break; - Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + // Special cases when we convert a carry to a boolean. + if (Op.getResNo() == 0) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // (ADDE 0, 0, C) will give us a single bit. + if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) && + isNullConstant(RHS)) { + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + } + } break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. @@ -12848,7 +13142,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight( return weight; } -typedef std::pair RCPair; +using RCPair = std::pair; + RCPair ARMTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { @@ -12887,7 +13182,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint( return RCPair(0U, &ARM::QPR_8RegClass); break; case 't': - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::i32) return RCPair(0U, &ARM::SPRRegClass); break; } @@ -13293,6 +13588,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::arm_neon_vld1: @@ -13311,9 +13607,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; + // volatile loads with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::arm_neon_vst1: @@ -13338,9 +13633,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; + // volatile stores with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::arm_ldaex: @@ -13352,9 +13646,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlex: @@ -13366,9 +13658,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlexd: @@ -13378,9 +13668,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; case Intrinsic::arm_ldaexd: @@ -13390,9 +13678,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; default: @@ -13414,7 +13700,7 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, +bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; @@ -13650,7 +13936,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); if (!Subtarget->isLittle()) - std::swap (Lo, Hi); + std::swap(Lo, Hi); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall(Strex, {Lo, Hi, Addr}); } @@ -13772,7 +14058,6 @@ bool ARMTargetLowering::lowerInterleavedLoad( DenseMap> SubVecs; for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { - // If we're generating more than one load, compute the base address of // subsequent loads as an offset from the previous. if (LoadCount > 0) @@ -13913,7 +14198,6 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Intrinsic::arm_neon_vst4}; for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { - // If we generating more than one store, we compute the base address of // subsequent stores as an offset from the previous. if (StoreCount > 0) @@ -14080,7 +14364,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( + assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); -- cgit v1.2.3