summaryrefslogtreecommitdiff
path: root/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/Target/ARM/ARMISelLowering.cpp
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp800
1 files changed, 542 insertions, 258 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 27dda93387b6f..1b4d7ff508489 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,6 +24,7 @@
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -56,6 +57,11 @@
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
@@ -93,7 +99,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
@@ -221,19 +226,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
RegInfo = Subtarget->getRegisterInfo();
Itins = Subtarget->getInstrItineraryData();
+ setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
!Subtarget->isTargetWatchOS()) {
- const auto &E = Subtarget->getTargetTriple().getEnvironment();
-
- bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
- E == Triple::MuslEABIHF;
- // Windows is a special case. Technically, we will replace all of the "GNU"
- // calls with calls to MSVCRT if appropriate and adjust the calling
- // convention then.
- IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
-
+ bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
IsHFTarget ? CallingConv::ARM_AAPCS_VFP
@@ -801,6 +799,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
+ setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
+
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
@@ -1562,7 +1563,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
bool isVarArg) const {
switch (CC) {
default:
- llvm_unreachable("Unsupported calling convention");
+ report_fatal_error("Unsupported calling convention");
case CallingConv::ARM_AAPCS:
case CallingConv::ARM_APCS:
case CallingConv::GHC:
@@ -1611,7 +1612,7 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
bool isVarArg) const {
switch (getEffectiveCallingConv(CC, isVarArg)) {
default:
- llvm_unreachable("Unsupported calling convention");
+ report_fatal_error("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
case CallingConv::ARM_AAPCS:
@@ -1634,7 +1635,6 @@ SDValue ARMTargetLowering::LowerCallResult(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
-
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
@@ -1732,7 +1732,6 @@ void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
SDValue &StackPtr,
SmallVectorImpl<SDValue> &MemOpChains,
ISD::ArgFlagsTy Flags) const {
-
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
unsigned id = Subtarget->isLittle() ? 0 : 1;
@@ -1774,7 +1773,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
bool isSibCall = false;
- auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+ auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
@@ -1783,9 +1782,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
+ isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
- if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
@@ -1982,7 +1981,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isDirect = false;
const TargetMachine &TM = getTargetMachine();
- const Module *Mod = MF.getFunction()->getParent();
+ const Module *Mod = MF.getFunction().getParent();
const GlobalValue *GV = nullptr;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
GV = G->getGlobal();
@@ -2032,9 +2031,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// more times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
- auto *BB = CLI.CS->getParent();
+ auto *BB = CLI.CS.getParent();
bool PreferIndirect =
- Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
+ Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
@@ -2106,7 +2105,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
// Emit regular call when code size is the priority
- !MF.getFunction()->optForMinSize())
+ !MF.getFunction().optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -2281,18 +2280,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const Function *CallerF = MF.getFunction();
- CallingConv::ID CallerCC = CallerF->getCallingConv();
+ const Function &CallerF = MF.getFunction();
+ CallingConv::ID CallerCC = CallerF.getCallingConv();
assert(Subtarget->supportsTailCall());
+ // Tail calls to function pointers cannot be optimized for Thumb1 if the args
+ // to the call take up r0-r3. The reason is that there are no legal registers
+ // left to hold the pointer to the function to be called.
+ if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
+ !isa<GlobalAddressSDNode>(Callee.getNode()))
+ return false;
+
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
- if (CallerF->hasFnAttribute("interrupt"))
+ if (CallerF.hasFnAttribute("interrupt"))
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
@@ -2404,9 +2410,9 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
const SDLoc &DL, SelectionDAG &DAG) {
const MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
+ const Function &F = MF.getFunction();
- StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+ StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
// See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
// version of the "preferred return address". These offsets affect the return
@@ -2440,7 +2446,6 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
-
// CCValAssign - represent the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
@@ -2548,7 +2553,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
//
// M-class CPUs actually use a normal return sequence with a special
// (hardware-provided) value in LR, so the normal code path works.
- if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
!Subtarget->isMClass()) {
if (Subtarget->isThumb1Only())
report_fatal_error("interrupt attribute is not supported in Thumb1");
@@ -2686,7 +2691,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
auto T = const_cast<Type*>(CP->getType());
auto C = const_cast<Constant*>(CP->getConstVal());
auto M = const_cast<Module*>(DAG.getMachineFunction().
- getFunction()->getParent());
+ getFunction().getParent());
auto GV = new GlobalVariable(
*M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
@@ -2768,7 +2773,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SDValue
ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
- assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ assert(Subtarget->isTargetDarwin() &&
+ "This function expects a Darwin target");
SDLoc DL(Op);
// First step is to get the address of the actua global symbol. This is where
@@ -2794,7 +2800,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
// trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
// silly).
auto TRI =
- getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+ getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
@@ -2960,6 +2966,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
if (Subtarget->isTargetDarwin())
return LowerGlobalTLSAddressDarwin(Op, DAG);
@@ -2968,10 +2978,6 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// TODO: implement the "local dynamic" model
assert(Subtarget->isTargetELF() && "Only ELF implemented here");
- GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- if (DAG.getTarget().Options.EmulatedTLS)
- return LowerToTLSEmulatedModel(GA, DAG);
-
TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
switch (model) {
@@ -3049,7 +3055,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// This is a win if the constant is only used in one function (so it doesn't
// need to be duplicated) or duplicating the constant wouldn't increase code
// size (implying the constant is no larger than 4 bytes).
- const Function *F = DAG.getMachineFunction().getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
// We rely on this decision to inline being idemopotent and unrelated to the
// use-site. We know that if we inline a variable at one use site, we'll
@@ -3107,7 +3113,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
// in multiple functions but it no larger than a pointer. We also check if
// GVar has constant (non-ConstantExpr) users. If so, it essentially has its
// address taken.
- if (!allUsersAreInFunction(GVar, F) &&
+ if (!allUsersAreInFunction(GVar, &F) &&
!(Size <= 4 && allUsersAreInFunctions(GVar)))
return SDValue();
@@ -3134,7 +3140,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
}
-static bool isReadOnly(const GlobalValue *GV) {
+bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
GV = GA->getBaseObject();
return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
@@ -3169,28 +3175,12 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
if (isPositionIndependent()) {
bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
-
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc dl(Op);
- unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
- GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
- UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
- /*AddCurrentAddress=*/UseGOT_PREL);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(
- PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
- SDValue Chain = Result.getValue(1);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
- Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ UseGOT_PREL ? ARMII::MO_GOT : 0);
+ SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
if (UseGOT_PREL)
Result =
- DAG.getLoad(PtrVT, dl, Chain, Result,
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
} else if (Subtarget->isROPI() && IsRO) {
@@ -3332,7 +3322,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
bool IsPositionIndependent = isPositionIndependent();
unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
+ ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -3608,7 +3598,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
SmallVector<SDValue, 16> ArgValues;
SDValue ArgValue;
- Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
// Initially ArgRegsSaveSize is zero.
@@ -3690,7 +3680,6 @@ SDValue ARMTargetLowering::LowerFormalArguments(
DAG.getIntPtrConstant(1, dl));
} else
ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
-
} else {
const TargetRegisterClass *RC;
@@ -3733,7 +3722,6 @@ SDValue ARMTargetLowering::LowerFormalArguments(
}
InVals.push_back(ArgValue);
-
} else { // VA.isRegLoc()
// sanity check
assert(VA.isMemLoc());
@@ -3853,6 +3841,12 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
}
+ } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
+ (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
+ // In ARM and Thumb-2, the compare instructions can shift their second
+ // operand.
+ CC = ISD::getSetCCSwappedOperands(CC);
+ std::swap(LHS, RHS);
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
@@ -3952,7 +3946,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
}
SDValue
-ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
@@ -3974,6 +3968,66 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
+static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
+ SelectionDAG &DAG) {
+ SDLoc DL(BoolCarry);
+ EVT CarryVT = BoolCarry.getValueType();
+
+ APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+ // This converts the boolean value carry into the carry flag by doing
+ // ARMISD::ADDC Carry, ~0
+ return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32),
+ BoolCarry, DAG.getConstant(NegOne, DL, CarryVT));
+}
+
+static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
+ SelectionDAG &DAG) {
+ SDLoc DL(Flags);
+
+ // Now convert the carry flag into a boolean carry. We do this
+ // using ARMISD:ADDE 0, 0, Carry
+ return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32), Flags);
+}
+
+SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+ return SDValue();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ EVT VT = Op.getValueType();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ SDValue Value;
+ SDValue Overflow;
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown overflow instruction!");
+ case ISD::UADDO:
+ Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
+ // Convert the carry flag into a boolean value.
+ Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
+ break;
+ case ISD::USUBO: {
+ Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
+ // Convert the carry flag into a boolean value.
+ Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
+ // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
+ // value. So compute 1 - C.
+ Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(1, dl, MVT::i32), Overflow);
+ break;
+ }
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
+}
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
@@ -4518,7 +4572,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
- SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
// Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
// which does another jump to the destination. This also makes it easier
@@ -4532,7 +4586,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
Chain = Addr.getValue(1);
- Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+ Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
Addr =
@@ -4935,7 +4989,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
ARMcc, CCR, CmpLo);
-
SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue HiBigShift = Opc == ISD::SRA
? DAG.getNode(Opc, dl, VT, ShOpHi,
@@ -5370,7 +5423,6 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
if (Opc == ARMISD::VCEQ) {
-
SDValue AndOp;
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
AndOp = Op0;
@@ -5800,6 +5852,13 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
return VT == MVT::v8i8 && M.size() == 8;
}
+static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
+ unsigned Index) {
+ if (Mask.size() == Elements * 2)
+ return Index / Elements;
+ return Mask[Index] == 0 ? 0 : 1;
+}
+
// Checks whether the shuffle mask represents a vector transpose (VTRN) by
// checking that pairs of elements in the shuffle mask represent the same index
// in each vector, incrementing the expected index by 2 at each step.
@@ -5836,10 +5895,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
// element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
// M[0] is used to determine WhichResult
for (unsigned i = 0; i < M.size(); i += NumElts) {
- if (M.size() == NumElts * 2)
- WhichResult = i / NumElts;
- else
- WhichResult = M[i] == 0 ? 0 : 1;
+ WhichResult = SelectPairHalf(NumElts, M, i);
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
@@ -5866,10 +5922,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- if (M.size() == NumElts * 2)
- WhichResult = i / NumElts;
- else
- WhichResult = M[i] == 0 ? 0 : 1;
+ WhichResult = SelectPairHalf(NumElts, M, i);
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
(M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
@@ -5901,10 +5954,7 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- if (M.size() == NumElts * 2)
- WhichResult = i / NumElts;
- else
- WhichResult = M[i] == 0 ? 0 : 1;
+ WhichResult = SelectPairHalf(NumElts, M, i);
for (unsigned j = 0; j < NumElts; ++j) {
if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
return false;
@@ -5935,10 +5985,7 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
unsigned Half = NumElts / 2;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- if (M.size() == NumElts * 2)
- WhichResult = i / NumElts;
- else
- WhichResult = M[i] == 0 ? 0 : 1;
+ WhichResult = SelectPairHalf(NumElts, M, i);
for (unsigned j = 0; j < NumElts; j += Half) {
unsigned Idx = WhichResult;
for (unsigned k = 0; k < Half; ++k) {
@@ -5978,10 +6025,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- if (M.size() == NumElts * 2)
- WhichResult = i / NumElts;
- else
- WhichResult = M[i] == 0 ? 0 : 1;
+ WhichResult = SelectPairHalf(NumElts, M, i);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@@ -6014,10 +6058,7 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
- if (M.size() == NumElts * 2)
- WhichResult = i / NumElts;
- else
- WhichResult = M[i] == 0 ? 0 : 1;
+ WhichResult = SelectPairHalf(NumElts, M, i);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned j = 0; j < NumElts; j += 2) {
if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
@@ -6532,9 +6573,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
-bool
-ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
- EVT VT) const {
+bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned PFIndexes[4];
@@ -7392,6 +7431,53 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
+ SDNode *N = Op.getNode();
+ EVT VT = N->getValueType(0);
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ SDValue Carry = Op.getOperand(2);
+ EVT CarryVT = Carry.getValueType();
+
+ SDLoc DL(Op);
+
+ APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+
+ SDValue Result;
+ if (Op.getOpcode() == ISD::ADDCARRY) {
+ // This converts the boolean value carry into the carry flag.
+ Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
+
+ // Do the addition proper using the carry flag we wanted.
+ Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
+ Op.getOperand(1), Carry.getValue(1));
+
+ // Now convert the carry flag into a boolean value.
+ Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
+ } else {
+ // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
+ // have to invert the carry first.
+ Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
+ DAG.getConstant(1, DL, MVT::i32), Carry);
+ // This converts the boolean value carry into the carry flag.
+ Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
+
+ // Do the subtraction proper using the carry flag we wanted.
+ Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
+ Op.getOperand(1), Carry.getValue(1));
+
+ // Now convert the carry flag into a boolean value.
+ Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
+ // But the carry returned by ARMISD::SUBE is not a borrow as expected
+ // by ISD::SUBCARRY, so compute 1 - C.
+ Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
+ DAG.getConstant(1, DL, MVT::i32), Carry);
+ }
+
+ // Return both values.
+ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
+}
+
SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin());
@@ -7668,9 +7754,9 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
SDValue InChain = DAG.getEntryNode();
SDValue TCChain = InChain;
- const auto *F = DAG.getMachineFunction().getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
- F->getReturnType() == LCRTy;
+ F.getReturnType() == LCRTy;
if (IsTC)
InChain = TCChain;
@@ -7686,6 +7772,7 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
@@ -7746,11 +7833,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::SADDO:
- case ISD::UADDO:
case ISD::SSUBO:
+ return LowerSignedALUO(Op, DAG);
+ case ISD::UADDO:
case ISD::USUBO:
- return LowerXALUO(Op, DAG);
+ return LowerUnsignedALUO(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
@@ -7864,7 +7954,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineRegisterInfo *MRI = &MF->getRegInfo();
MachineConstantPool *MCP = MF->getConstantPool();
ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
- const Function *F = MF->getFunction();
+ const Function &F = MF->getFunction();
bool isThumb = Subtarget->isThumb();
bool isThumb2 = Subtarget->isThumb2();
@@ -7872,7 +7962,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
unsigned PCLabelId = AFI->createPICLabelUId();
unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
ARMConstantPoolValue *CPV =
- ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+ ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
@@ -8158,7 +8248,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
- Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
@@ -8259,7 +8349,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
- Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
// MachineConstantPool wants an explicit alignment.
@@ -8555,7 +8645,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
- if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) &&
+ if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
@@ -8661,7 +8751,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
.add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
- Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+ Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
// MachineConstantPool wants an explicit alignment.
@@ -8797,7 +8887,6 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
switch (TM.getCodeModel()) {
case CodeModel::Small:
case CodeModel::Medium:
- case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
.add(predOps(ARMCC::AL))
@@ -8809,8 +8898,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
.addReg(ARM::CPSR,
RegState::Implicit | RegState::Define | RegState::Dead);
break;
- case CodeModel::Large:
- case CodeModel::JITDefault: {
+ case CodeModel::Large: {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
@@ -8886,8 +8974,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Thumb1 post-indexed loads are really just single-register LDMs.
case ARM::tLDR_postidx: {
+ MachineOperand Def(MI.getOperand(1));
+ if (TargetRegisterInfo::isPhysicalRegister(Def.getReg()))
+ Def.setIsRenamable(false);
BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
- .add(MI.getOperand(1)) // Rn_wb
+ .add(Def) // Rn_wb
.add(MI.getOperand(2)) // Rn
.add(MI.getOperand(3)) // PredImm
.add(MI.getOperand(4)) // PredReg
@@ -9193,7 +9284,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// operand is still set to noreg. If needed, set the optional operand's
// register to CPSR, and remove the redundant implicit def.
//
- // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+ // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
@@ -9612,7 +9703,6 @@ static SDValue findMUL_LOHI(SDValue V) {
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
if (Subtarget->isThumb()) {
if (!Subtarget->hasDSP())
return SDValue();
@@ -9701,11 +9791,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
// a S/UMLAL instruction.
// UMUL_LOHI
// / :lo \ :hi
- // / \ [no multiline comment]
- // loAdd -> ADDE |
- // \ :glue /
- // \ /
- // ADDC <- hiAdd
+ // V \ [no multiline comment]
+ // loAdd -> ADDC |
+ // \ :carry /
+ // V V
+ // ADDE <- hiAdd
//
assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
@@ -9713,7 +9803,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
AddeNode->getOperand(2).getValueType() == MVT::i32 &&
"ADDE node has the wrong inputs");
- // Check that we have a glued ADDC node.
+ // Check that we are chained to the right ADDC node.
SDNode* AddcNode = AddeNode->getOperand(2).getNode();
if (AddcNode->getOpcode() != ARMISD::ADDC)
return SDValue();
@@ -9764,7 +9854,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
SDValue* LoMul = nullptr;
SDValue* LowAdd = nullptr;
- // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
+ // Ensure that ADDE is from high result of ISD::xMUL_LOHI.
if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
return SDValue();
@@ -9789,6 +9879,12 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
if (!LoMul)
return SDValue();
+ // If HiAdd is the same node as ADDC or is a predecessor of ADDC the
+ // replacement below will create a cycle.
+ if (AddcNode == HiAdd->getNode() ||
+ AddcNode->isPredecessorOf(HiAdd->getNode()))
+ return SDValue();
+
// Create the merged node.
SelectionDAG &DAG = DCI.DAG;
@@ -9852,7 +9948,6 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
AddeNode->getOperand(1).getNode() == UmlalNode) ||
(AddeNode->getOperand(0).getNode() == UmlalNode &&
isNullConstant(AddeNode->getOperand(1)))) {
-
SelectionDAG &DAG = DCI.DAG;
SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
UmlalNode->getOperand(2), AddHi };
@@ -9891,13 +9986,27 @@ static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue PerformAddcSubcCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG(DCI.DAG);
+
+ if (N->getOpcode() == ARMISD::ADDC) {
+ // (ADDC (ADDE 0, 0, C), -1) -> C
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS->getOpcode() == ARMISD::ADDE &&
+ isNullConstant(LHS->getOperand(0)) &&
+ isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
+ return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
+ }
+ }
+
if (Subtarget->isThumb1Only()) {
SDValue RHS = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
int32_t imm = C->getSExtValue();
- if (imm < 0 && imm > INT_MIN) {
+ if (imm < 0 && imm > std::numeric_limits<int>::min()) {
SDLoc DL(N);
RHS = DAG.getConstant(-imm, DL, MVT::i32);
unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
@@ -9974,6 +10083,102 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue PerformSHLSimplify(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *ST) {
+ // Allow the generic combiner to identify potential bswaps.
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
+ // DAG combiner will fold:
+ // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
+ // Other code patterns that can be also be modified have the following form:
+ // b + ((a << 1) | 510)
+ // b + ((a << 1) & 510)
+ // b + ((a << 1) ^ 510)
+ // b + ((a << 1) + 510)
+
+ // Many instructions can perform the shift for free, but it requires both
+ // the operands to be registers. If c1 << c2 is too large, a mov immediate
+ // instruction will needed. So, unfold back to the original pattern if:
+ // - if c1 and c2 are small enough that they don't require mov imms.
+ // - the user(s) of the node can perform an shl
+
+ // No shifted operands for 16-bit instructions.
+ if (ST->isThumb() && ST->isThumb1Only())
+ return SDValue();
+
+ // Check that all the users could perform the shl themselves.
+ for (auto U : N->uses()) {
+ switch(U->getOpcode()) {
+ default:
+ return SDValue();
+ case ISD::SUB:
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SETCC:
+ case ARMISD::CMP:
+ // Check that its not already using a shl.
+ if (U->getOperand(0).getOpcode() == ISD::SHL ||
+ U->getOperand(1).getOpcode() == ISD::SHL)
+ return SDValue();
+ break;
+ }
+ }
+
+ if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
+ N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::SHL)
+ return SDValue();
+
+ SDValue SHL = N->getOperand(0);
+
+ auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+ if (!C1ShlC2 || !C2)
+ return SDValue();
+
+ DEBUG(dbgs() << "Trying to simplify shl: "; N->dump());
+
+ APInt C2Int = C2->getAPIntValue();
+ APInt C1Int = C1ShlC2->getAPIntValue();
+
+ // Check that performing a lshr will not lose any information.
+ APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(),
+ C2Int.getBitWidth() - C2->getZExtValue());
+ if ((C1Int & Mask) != C1Int)
+ return SDValue();
+
+ // Shift the first constant.
+ C1Int.lshrInPlace(C2Int);
+
+ // The immediates are encoded as an 8-bit value that can be rotated.
+ unsigned Zeros = C1Int.countLeadingZeros() + C1Int.countTrailingZeros();
+ if (C1Int.getBitWidth() - Zeros > 8)
+ return SDValue();
+
+ Zeros = C2Int.countLeadingZeros() + C2Int.countTrailingZeros();
+ if (C2Int.getBitWidth() - Zeros > 8)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ SDValue X = SHL.getOperand(0);
+ SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
+ DAG.getConstant(C1Int, dl, MVT::i32));
+ // Shift left to compensate for the lshr of C1Int.
+ SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
+
+ DAG.ReplaceAllUsesWith(SDValue(N, 0), Res);
+ return SDValue(N, 0);
+}
+
+
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
@@ -9982,6 +10187,10 @@ static SDValue PerformADDCombine(SDNode *N,
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // Only works one way, because it needs an immediate operand.
+ if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
+ return Result;
+
// First try with the default operand order.
if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
return Result;
@@ -10121,7 +10330,6 @@ static SDValue PerformMULCombine(SDNode *N,
MVT::i32)));
Res = DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, MVT::i32), Res);
-
} else
return SDValue();
}
@@ -10171,6 +10379,9 @@ static SDValue PerformANDCombine(SDNode *N,
// fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
return Result;
+
+ if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
+ return Result;
}
return SDValue();
@@ -10237,95 +10448,17 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR,
return SDValue(OR, 0);
}
-/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
-static SDValue PerformORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
- // Attempt to use immediate-form VORR
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SelectionDAG &DAG = DCI.DAG;
-
- if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (BVN && Subtarget->hasNEON() &&
- BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- if (SplatBitSize <= 64) {
- EVT VorrVT;
- SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize,
- DAG, dl, VorrVT, VT.is128BitVector(),
- OtherModImm);
- if (Val.getNode()) {
- SDValue Input =
- DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
- SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
- }
- }
- }
-
- if (!Subtarget->isThumb1Only()) {
- // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
- if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
- return Result;
- if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
- return Result;
- }
-
- // The code below optimizes (or (and X, Y), Z).
- // The AND operand needs to have a single user to make these optimizations
- // profitable.
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
- return SDValue();
- SDValue N1 = N->getOperand(1);
-
- // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
- if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
- DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
- APInt SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
-
- APInt SplatBits0, SplatBits1;
- BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
- BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
- // Ensure that the second operand of both ands are constants
- if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs) {
- if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs) {
- // Ensure that the bit width of the constants are the same and that
- // the splat arguments are logical inverses as per the pattern we
- // are trying to simplify.
- if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
- SplatBits0 == ~SplatBits1) {
- // Canonicalize the vector type to make instruction selection
- // simpler.
- EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
- SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
- N0->getOperand(1),
- N0->getOperand(0),
- N1->getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
- }
- }
- }
- }
-
- // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
- // reasonable.
-
+static SDValue PerformORCombineToBFI(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
// BFI is only available on V6T2+
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
@@ -10367,9 +10500,10 @@ static SDValue PerformORCombine(SDNode *N,
DAG.getConstant(Val, DL, MVT::i32),
DAG.getConstant(Mask, DL, MVT::i32));
- // Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
- return SDValue();
+ // Return value from the original node to inform the combiner than N is
+ // now dead.
+ return SDValue(N, 0);
}
} else if (N1.getOpcode() == ISD::AND) {
// case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
@@ -10393,9 +10527,10 @@ static SDValue PerformORCombine(SDNode *N,
DAG.getConstant(amt, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, DL, MVT::i32));
- // Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
- return SDValue();
+ // Return value from the original node to inform the combiner than N is
+ // now dead.
+ return SDValue(N, 0);
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
@@ -10409,9 +10544,10 @@ static SDValue PerformORCombine(SDNode *N,
DAG.getConstant(lsb, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
DAG.getConstant(Mask2, DL, MVT::i32));
- // Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
- return SDValue();
+ // Return value from the original node to inform the combiner than N is
+ // now dead.
+ return SDValue(N, 0);
}
}
@@ -10429,10 +10565,109 @@ static SDValue PerformORCombine(SDNode *N,
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
DAG.getConstant(~Mask, DL, MVT::i32));
- // Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
+ // Return value from the original node to inform the combiner than N is
+ // now dead.
+ return SDValue(N, 0);
+ }
+
+ return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Attempt to use immediate-form VORR
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN && Subtarget->hasNEON() &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (SplatBitSize <= 64) {
+ EVT VorrVT;
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, dl, VorrVT, VT.is128BitVector(),
+ OtherModImm);
+ if (Val.getNode()) {
+ SDValue Input =
+ DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ }
+ }
}
+ if (!Subtarget->isThumb1Only()) {
+ // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+ if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
+ return Result;
+ if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
+ return Result;
+ }
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+
+ // The code below optimizes (or (and X, Y), Z).
+ // The AND operand needs to have a single user to make these optimizations
+ // profitable.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ APInt SplatBits0, SplatBits1;
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ // Ensure that the second operand of both ands are constants
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ // Ensure that the bit width of the constants are the same and that
+ // the splat arguments are logical inverses as per the pattern we
+ // are trying to simplify.
+ if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection
+ // simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1),
+ N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+ }
+
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+ if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
+ if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
+ return Res;
+ }
+
+ if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
+ return Result;
+
return SDValue();
}
@@ -10449,6 +10684,9 @@ static SDValue PerformXORCombine(SDNode *N,
// fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
+
+ if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
+ return Result;
}
return SDValue();
@@ -11781,6 +12019,14 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static const APInt *isPowerOf2Constant(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ if (!C)
+ return nullptr;
+ const APInt *CV = &C->getAPIntValue();
+ return CV->isPowerOf2() ? CV : nullptr;
+}
+
SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
// If we have a CMOV, OR and AND combination such as:
// if (x & CN)
@@ -11809,8 +12055,8 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDValue And = CmpZ->getOperand(0);
if (And->getOpcode() != ISD::AND)
return SDValue();
- ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
- if (!AndC || !AndC->getAPIntValue().isPowerOf2())
+ const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
+ if (!AndC)
return SDValue();
SDValue X = And->getOperand(0);
@@ -11850,7 +12096,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDValue V = Y;
SDLoc dl(X);
EVT VT = X.getValueType();
- unsigned BitInX = AndC->getAPIntValue().logBase2();
+ unsigned BitInX = AndC->logBase2();
if (BitInX != 0) {
// We must shift X first.
@@ -12011,7 +12257,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
case ARMISD::ADDC:
- case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
@@ -12171,11 +12417,11 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- const Function *F = MF.getFunction();
+ const Function &F = MF.getFunction();
// See if we can use NEON instructions for this...
if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() &&
- !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ !F.hasFnAttribute(Attribute::NoImplicitFloat)) {
bool Fast;
if (Size >= 16 &&
(memOpAlign(SrcAlign, DstAlign, 16) ||
@@ -12193,6 +12439,26 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::Other;
}
+// 64-bit integers are split into their high and low parts and held in two
+// different registers, so the trunc is free since the low register can just
+// be used.
+bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
+ if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
+ return false;
+ unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
+ unsigned DestBits = DstTy->getPrimitiveSizeInBits();
+ return (SrcBits == 64 && DestBits == 32);
+}
+
+bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
+ if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
+ !DstVT.isInteger())
+ return false;
+ unsigned SrcBits = SrcVT.getSizeInBits();
+ unsigned DestBits = DstVT.getSizeInBits();
+ return (SrcBits == 64 && DestBits == 32);
+}
+
bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
if (Val.getOpcode() != ISD::LOAD)
return false;
@@ -12261,7 +12527,6 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
-
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;
@@ -12377,8 +12642,13 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
Scale = Scale & ~1;
return Scale == 2 || Scale == 4 || Scale == 8;
case MVT::i64:
+ // FIXME: What are we trying to model here? ldrd doesn't have an r + r
+ // version in Thumb mode.
// r + r
- if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ if (Scale == 1)
+ return true;
+ // r * 2 (this can be lowered to r + r).
+ if (!AM.HasBaseReg && Scale == 2)
return true;
return false;
case MVT::isVoid:
@@ -12392,11 +12662,26 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
}
}
+bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM,
+ EVT VT) const {
+ const int Scale = AM.Scale;
+
+ // Negative scales are not supported in Thumb1.
+ if (Scale < 0)
+ return false;
+
+ // Thumb1 addressing modes do not support register scaling excepting the
+ // following cases:
+ // 1. Scale == 1 means no scaling.
+ // 2. Scale == 2 this can be lowered to r + r if there is no base register.
+ return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
+}
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
EVT VT = getValueType(DL, Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
@@ -12408,10 +12693,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
switch (AM.Scale) {
case 0: // no scale reg, must be "r+i" or "r", or "i".
break;
- case 1:
- if (Subtarget->isThumb1Only())
- return false;
- LLVM_FALLTHROUGH;
default:
// ARM doesn't support any R+R*scale+imm addr modes.
if (AM.BaseOffs)
@@ -12420,6 +12701,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (!VT.isSimple())
return false;
+ if (Subtarget->isThumb1Only())
+ return isLegalT1ScaledAddressingMode(AM, VT);
+
if (Subtarget->isThumb2())
return isLegalT2ScaledAddressingMode(AM, VT);
@@ -12436,8 +12720,11 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
return isPowerOf2_32(Scale & ~1);
case MVT::i16:
case MVT::i64:
- // r + r
- if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+ // r +/- r
+ if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
+ return true;
+ // r * 2 (this can be lowered to r + r).
+ if (!AM.HasBaseReg && Scale == 2)
return true;
return false;
@@ -12685,10 +12972,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case ARMISD::ADDE:
case ARMISD::SUBC:
case ARMISD::SUBE:
- // These nodes' second result is a boolean
- if (Op.getResNo() == 0)
- break;
- Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ // Special cases when we convert a carry to a boolean.
+ if (Op.getResNo() == 0) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // (ADDE 0, 0, C) will give us a single bit.
+ if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
+ isNullConstant(RHS)) {
+ Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ return;
+ }
+ }
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
@@ -12848,7 +13142,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
return weight;
}
-typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
+
RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
@@ -12887,7 +13182,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
return RCPair(0U, &ARM::QPR_8RegClass);
break;
case 't':
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::i32)
return RCPair(0U, &ARM::SPRRegClass);
break;
}
@@ -13293,6 +13588,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
/// specified in the intrinsic calls.
bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
+ MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
case Intrinsic::arm_neon_vld1:
@@ -13311,9 +13607,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
- Info.vol = false; // volatile loads with NEON intrinsics not supported
- Info.readMem = true;
- Info.writeMem = false;
+ // volatile loads with NEON intrinsics not supported
+ Info.flags = MachineMemOperand::MOLoad;
return true;
}
case Intrinsic::arm_neon_vst1:
@@ -13338,9 +13633,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.offset = 0;
Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
- Info.vol = false; // volatile stores with NEON intrinsics not supported
- Info.readMem = false;
- Info.writeMem = true;
+ // volatile stores with NEON intrinsics not supported
+ Info.flags = MachineMemOperand::MOStore;
return true;
}
case Intrinsic::arm_ldaex:
@@ -13352,9 +13646,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
- Info.vol = true;
- Info.readMem = true;
- Info.writeMem = false;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::arm_stlex:
@@ -13366,9 +13658,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
- Info.vol = true;
- Info.readMem = false;
- Info.writeMem = true;
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::arm_stlexd:
@@ -13378,9 +13668,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = 8;
- Info.vol = true;
- Info.readMem = false;
- Info.writeMem = true;
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::arm_ldaexd:
@@ -13390,9 +13678,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = 8;
- Info.vol = true;
- Info.readMem = true;
- Info.writeMem = false;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
default:
@@ -13414,7 +13700,7 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT,
+bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
@@ -13650,7 +13936,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
if (!Subtarget->isLittle())
- std::swap (Lo, Hi);
+ std::swap(Lo, Hi);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Strex, {Lo, Hi, Addr});
}
@@ -13772,7 +14058,6 @@ bool ARMTargetLowering::lowerInterleavedLoad(
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
-
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
@@ -13913,7 +14198,6 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Intrinsic::arm_neon_vst4};
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
-
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
@@ -14080,7 +14364,7 @@ void ARMTargetLowering::insertCopiesSplitCSR(
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
- assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ assert(Entry->getParent()->getFunction().hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);