summaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp1
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp20
-rw-r--r--lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td2
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp312
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp207
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.h33
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp32
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h36
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp15
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h8
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp2
17 files changed, 596 insertions, 128 deletions
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 90f635c812542..582153daebde9 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1103,6 +1103,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
StartOp = 2; NumOffset = 2;
+ LLVM_FALLTHROUGH;
case ARM::STMDB_UPD:
case ARM::t2STMDB_UPD:
case ARM::VSTMDDB_UPD:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 1ec6b24b2ed67..3cf5950a1918d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1880,6 +1880,9 @@ isProfitableToIfCvt(MachineBasicBlock &TBB,
// Diamond: TBB is the block that is branched to, FBB is the fallthrough
TUnpredCycles = TCycles + TakenBranchCost;
FUnpredCycles = FCycles + NotTakenBranchCost;
+ // The branch at the end of FBB will disappear when it's predicated, so
+ // discount it from PredCost.
+ PredCost -= 1 * ScalingUpFactor;
}
// The total cost is the cost of each path scaled by their probabilites
unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b4fb292c0116d..e97a7ce5067f9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -193,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned R = 0; R < 16; ++R)
markSuperRegs(Reserved, ARM::D16 + R);
}
- const TargetRegisterClass *RC = &ARM::GPRPairRegClass;
- for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
- for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
- if (Reserved.test(*SI)) markSuperRegs(Reserved, *I);
+ const TargetRegisterClass &RC = ARM::GPRPairRegClass;
+ for (unsigned Reg : RC)
+ for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
+ if (Reserved.test(*SI))
+ markSuperRegs(Reserved, Reg);
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
@@ -315,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
Hints.push_back(PairedPhys);
// Then prefer even or odd registers.
- for (unsigned I = 0, E = Order.size(); I != E; ++I) {
- unsigned Reg = Order[I];
+ for (unsigned Reg : Order) {
if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
continue;
// Don't provide hints that are paired to a reserved register.
@@ -659,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
unsigned i = 0;
-
- while (!MI->getOperand(i).isFI()) {
- ++i;
- assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
- }
+ for (; !MI->getOperand(i).isFI(); ++i)
+ assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!");
// AddrMode4 and AddrMode6 cannot handle any offset.
if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index e498f70b820db..051827a6a6a2f 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -321,7 +321,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler {
assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- // The necesary extensions are handled on the other side of the ABI
+ // The necessary extensions are handled on the other side of the ABI
// boundary.
markPhysRegUsed(PhysReg);
MIRBuilder.buildCopy(ValVReg, PhysReg);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e42514acd76f0..6ba7593543a92 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3398,9 +3398,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc dl(Op);
- ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
- auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
- if (Scope == SynchronizationScope::SingleThread)
+ ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
+ auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
+ if (SSID == SyncScope::SingleThread)
return Op;
if (!Subtarget->hasDataBarrier()) {
@@ -5356,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
// Integer comparisons.
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal integer comparison");
- case ISD::SETNE: Invert = true;
+ case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
- case ISD::SETLT: Swap = true;
+ case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = ARMISD::VCGT; break;
- case ISD::SETLE: Swap = true;
+ case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGE: Opc = ARMISD::VCGE; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
}
@@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+ SubVec = Builder.CreateIntToPtr(
+ SubVec, VectorType::get(SV->getType()->getVectorElementType(),
+ VecTy->getVectorNumElements()));
SubVecs[SV].push_back(SubVec);
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5044134f5b1e2..f05b142552369 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -510,7 +510,8 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
- bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override {
+ bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
+ const SelectionDAG &DAG) const override {
// Do not merge to larger than i32.
return (MemVT.getSizeInBits() <= 32);
}
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 53db5acbe805c..42eac12e457b2 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -4799,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm",
// Pseudo instruction ldr Rt, =immediate
def t2LDRConstPool
: t2AsmPseudo<"ldr${p} $Rt, $immediate",
- (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
+ (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
// Version w/ the .w suffix.
def : t2InstAlias<"ldr${p}.w $Rt, $immediate",
(t2LDRConstPool GPRnopc:$Rt,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 374176d1d7371..29ef69ad0010f 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -20,6 +20,8 @@
#define DEBUG_TYPE "arm-isel"
+#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -42,13 +44,32 @@ public:
private:
bool selectImpl(MachineInstr &I) const;
- bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const;
+ struct CmpConstants;
+ struct InsertInfo;
+
+ bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const;
+
+ // Helper for inserting a comparison sequence that sets \p ResReg to either 1
+ // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or
+ // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS).
+ bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg,
+ ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg,
+ unsigned PrevRes) const;
+
+ // Set \p DestReg to \p Constant.
+ void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const;
+
+ bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const;
+
+ // Check if the types match and both operands have the expected size and
+ // register bank.
+ bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS,
+ unsigned ExpectedSize, unsigned ExpectedRegBankID) const;
- bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const;
+ // Check if the register has the expected size and register bank.
+ bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const;
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
@@ -251,120 +272,233 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
return Opc;
}
-static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+// When lowering comparisons, we sometimes need to perform two compares instead
+// of just one. Get the condition codes for both comparisons. If only one is
+// needed, the second member of the pair is ARMCC::AL.
+static std::pair<ARMCC::CondCodes, ARMCC::CondCodes>
+getComparePreds(CmpInst::Predicate Pred) {
+ std::pair<ARMCC::CondCodes, ARMCC::CondCodes> Preds = {ARMCC::AL, ARMCC::AL};
switch (Pred) {
- // Needs two compares...
case CmpInst::FCMP_ONE:
+ Preds = {ARMCC::GT, ARMCC::MI};
+ break;
case CmpInst::FCMP_UEQ:
- default:
- // AL is our "false" for now. The other two need more compares.
- return ARMCC::AL;
+ Preds = {ARMCC::EQ, ARMCC::VS};
+ break;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
- return ARMCC::EQ;
+ Preds.first = ARMCC::EQ;
+ break;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
- return ARMCC::GT;
+ Preds.first = ARMCC::GT;
+ break;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
- return ARMCC::GE;
+ Preds.first = ARMCC::GE;
+ break;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
- return ARMCC::HI;
+ Preds.first = ARMCC::HI;
+ break;
case CmpInst::FCMP_OLT:
- return ARMCC::MI;
+ Preds.first = ARMCC::MI;
+ break;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
- return ARMCC::LS;
+ Preds.first = ARMCC::LS;
+ break;
case CmpInst::FCMP_ORD:
- return ARMCC::VC;
+ Preds.first = ARMCC::VC;
+ break;
case CmpInst::FCMP_UNO:
- return ARMCC::VS;
+ Preds.first = ARMCC::VS;
+ break;
case CmpInst::FCMP_UGE:
- return ARMCC::PL;
+ Preds.first = ARMCC::PL;
+ break;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
- return ARMCC::LT;
+ Preds.first = ARMCC::LT;
+ break;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
- return ARMCC::LE;
+ Preds.first = ARMCC::LE;
+ break;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
- return ARMCC::NE;
+ Preds.first = ARMCC::NE;
+ break;
case CmpInst::ICMP_UGE:
- return ARMCC::HS;
+ Preds.first = ARMCC::HS;
+ break;
case CmpInst::ICMP_ULT:
- return ARMCC::LO;
+ Preds.first = ARMCC::LO;
+ break;
+ default:
+ break;
}
+ assert(Preds.first != ARMCC::AL && "No comparisons needed?");
+ return Preds;
}
-bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB,
- const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
- auto &MBB = *MIB->getParent();
- auto InsertBefore = std::next(MIB->getIterator());
- auto &DebugLoc = MIB->getDebugLoc();
-
- // Move 0 into the result register.
- auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi))
- .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass))
- .addImm(0)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
- if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI))
+struct ARMInstructionSelector::CmpConstants {
+ CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank,
+ unsigned OpSize)
+ : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode),
+ OperandRegBankID(OpRegBank), OperandSize(OpSize) {}
+
+ // The opcode used for performing the comparison.
+ const unsigned ComparisonOpcode;
+
+ // The opcode used for reading the flags set by the comparison. May be
+ // ARM::INSTRUCTION_LIST_END if we don't need to read the flags.
+ const unsigned ReadFlagsOpcode;
+
+ // The assumed register bank ID for the operands.
+ const unsigned OperandRegBankID;
+
+ // The assumed size in bits for the operands.
+ const unsigned OperandSize;
+};
+
+struct ARMInstructionSelector::InsertInfo {
+ InsertInfo(MachineInstrBuilder &MIB)
+ : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())),
+ DbgLoc(MIB->getDebugLoc()) {}
+
+ MachineBasicBlock &MBB;
+ const MachineBasicBlock::instr_iterator InsertBefore;
+ const DebugLoc &DbgLoc;
+};
+
+void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg,
+ unsigned Constant) const {
+ (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi))
+ .addDef(DestReg)
+ .addImm(Constant)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+}
+
+bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI,
+ unsigned LHSReg, unsigned RHSReg,
+ unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const {
+ return MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
+ validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) &&
+ validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID);
+}
+
+bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg,
+ unsigned ExpectedSize,
+ unsigned ExpectedRegBankID) const {
+ if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) {
+ DEBUG(dbgs() << "Unexpected size for register");
return false;
+ }
- // Perform the comparison.
- auto LHSReg = MIB->getOperand(2).getReg();
- auto RHSReg = MIB->getOperand(3).getReg();
- assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
- MRI.getType(LHSReg).getSizeInBits() == 32 &&
- MRI.getType(RHSReg).getSizeInBits() == 32 &&
- "Unsupported types for comparison operation");
- auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr))
- .addUse(LHSReg)
- .addUse(RHSReg)
- .add(predOps(ARMCC::AL));
- if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) {
+ DEBUG(dbgs() << "Unexpected register bank for register");
return false;
+ }
+
+ return true;
+}
+
+bool ARMInstructionSelector::selectCmp(CmpConstants Helper,
+ MachineInstrBuilder &MIB,
+ MachineRegisterInfo &MRI) const {
+ const InsertInfo I(MIB);
- // Move 1 into the result register if the flags say so.
auto ResReg = MIB->getOperand(0).getReg();
+ if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID))
+ return false;
+
auto Cond =
static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate());
- auto ARMCond = getComparePred(Cond);
- if (ARMCond == ARMCC::AL)
+ if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) {
+ putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0);
+ MIB->eraseFromParent();
+ return true;
+ }
+
+ auto LHSReg = MIB->getOperand(2).getReg();
+ auto RHSReg = MIB->getOperand(3).getReg();
+ if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize,
+ Helper.OperandRegBankID))
return false;
- auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi))
+ auto ARMConds = getComparePreds(Cond);
+ auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ putConstant(I, ZeroReg, 0);
+
+ if (ARMConds.second == ARMCC::AL) {
+ // Simple case, we only need one comparison and we're done.
+ if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg,
+ ZeroReg))
+ return false;
+ } else {
+ // Not so simple, we need two successive comparisons.
+ auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg,
+ RHSReg, ZeroReg))
+ return false;
+ if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg,
+ IntermediateRes))
+ return false;
+ }
+
+ MIB->eraseFromParent();
+ return true;
+}
+
+bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I,
+ unsigned ResReg,
+ ARMCC::CondCodes Cond,
+ unsigned LHSReg, unsigned RHSReg,
+ unsigned PrevRes) const {
+ // Perform the comparison.
+ auto CmpI =
+ BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode))
+ .addUse(LHSReg)
+ .addUse(RHSReg)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Read the comparison flags (if necessary).
+ if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) {
+ auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc,
+ TII.get(Helper.ReadFlagsOpcode))
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI))
+ return false;
+ }
+
+ // Select either 1 or the previous result based on the value of the flags.
+ auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi))
.addDef(ResReg)
- .addUse(Mov0I->getOperand(0).getReg())
+ .addUse(PrevRes)
.addImm(1)
- .add(predOps(ARMCond, ARM::CPSR));
+ .add(predOps(Cond, ARM::CPSR));
if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
return false;
- MIB->eraseFromParent();
return true;
}
bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
- const ARMBaseInstrInfo &TII,
- MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI) const {
+ MachineRegisterInfo &MRI) const {
auto &MBB = *MIB->getParent();
auto InsertBefore = std::next(MIB->getIterator());
- auto &DebugLoc = MIB->getDebugLoc();
+ auto &DbgLoc = MIB->getDebugLoc();
// Compare the condition to 0.
auto CondReg = MIB->getOperand(1).getReg();
- assert(MRI.getType(CondReg).getSizeInBits() == 1 &&
- RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
- auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri))
+ auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri))
.addUse(CondReg)
.addImm(0)
.add(predOps(ARMCC::AL));
@@ -376,13 +510,10 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB,
auto ResReg = MIB->getOperand(0).getReg();
auto TrueReg = MIB->getOperand(2).getReg();
auto FalseReg = MIB->getOperand(3).getReg();
- assert(MRI.getType(ResReg) == MRI.getType(TrueReg) &&
- MRI.getType(TrueReg) == MRI.getType(FalseReg) &&
- MRI.getType(FalseReg).getSizeInBits() == 32 &&
- RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) &&
+ validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) &&
"Unsupported types for select operation");
- auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr))
+ auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr))
.addDef(ResReg)
.addUse(TrueReg)
.addUse(FalseReg)
@@ -494,10 +625,32 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
- case G_ICMP:
- return selectICmp(MIB, TII, MRI, TRI, RBI);
case G_SELECT:
- return selectSelect(MIB, TII, MRI, TRI, RBI);
+ return selectSelect(MIB, MRI);
+ case G_ICMP: {
+ CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END,
+ ARM::GPRRegBankID, 32);
+ return selectCmp(Helper, MIB, MRI);
+ }
+ case G_FCMP: {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP");
+
+ unsigned OpReg = I.getOperand(2).getReg();
+ unsigned Size = MRI.getType(OpReg).getSizeInBits();
+
+ if (Size == 64 && TII.getSubtarget().isFPOnlySP()) {
+ DEBUG(dbgs() << "Subtarget only supports single precision");
+ return false;
+ }
+ if (Size != 32 && Size != 64) {
+ DEBUG(dbgs() << "Unsupported size for G_FCMP operand");
+ return false;
+ }
+
+ CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT,
+ ARM::FPRRegBankID, Size);
+ return selectCmp(Helper, MIB, MRI);
+ }
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
@@ -510,11 +663,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
break;
case G_CONSTANT: {
unsigned Reg = I.getOperand(0).getReg();
- if (MRI.getType(Reg).getSizeInBits() != 32)
+
+ if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID))
return false;
- assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
- "Expected constant to live in a GPR");
I.setDesc(TII.get(ARM::MOVi));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index f3e62d09cc30a..f23e62595d2e9 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -28,6 +28,10 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
+static bool AEABI(const ARMSubtarget &ST) {
+ return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI();
+}
+
ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
using namespace TargetOpcode;
@@ -66,8 +70,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
for (unsigned Op : {G_SREM, G_UREM})
if (ST.hasDivideInARMMode())
setAction({Op, s32}, Lower);
- else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() ||
- ST.isTargetMuslAEABI())
+ else if (AEABI(ST))
setAction({Op, s32}, Custom);
else
setAction({Op, s32}, Libcall);
@@ -86,6 +89,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_SELECT, 1, s1}, Legal);
setAction({G_CONSTANT, s32}, Legal);
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_CONSTANT, Ty}, WidenScalar);
setAction({G_ICMP, s1}, Legal);
for (auto Ty : {s8, s16})
@@ -99,9 +104,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_LOAD, s64}, Legal);
setAction({G_STORE, s64}, Legal);
+
+ setAction({G_FCMP, s1}, Legal);
+ setAction({G_FCMP, 1, s32}, Legal);
+ setAction({G_FCMP, 1, s64}, Legal);
} else {
for (auto Ty : {s32, s64})
setAction({G_FADD, Ty}, Libcall);
+
+ setAction({G_FCMP, s1}, Legal);
+ setAction({G_FCMP, 1, s32}, Custom);
+ setAction({G_FCMP, 1, s64}, Custom);
+
+ if (AEABI(ST))
+ setFCmpLibcallsAEABI();
+ else
+ setFCmpLibcallsGNU();
}
for (unsigned Op : {G_FREM, G_FPOW})
@@ -111,11 +129,120 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
computeTables();
}
+void ARMLegalizerInfo::setFCmpLibcallsAEABI() {
+ // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be
+ // default-initialized.
+ FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp32Libcalls[CmpInst::FCMP_OEQ] = {
+ {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGE] = {
+ {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGT] = {
+ {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLE] = {
+ {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLT] = {
+ {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UNO] = {
+ {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_ONE] = {
+ {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp32Libcalls[CmpInst::FCMP_UEQ] = {
+ {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}};
+
+ FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp64Libcalls[CmpInst::FCMP_OEQ] = {
+ {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGE] = {
+ {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGT] = {
+ {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLE] = {
+ {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLT] = {
+ {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UNO] = {
+ {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_ONE] = {
+ {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}};
+ FCmp64Libcalls[CmpInst::FCMP_UEQ] = {
+ {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE},
+ {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}};
+}
+
+void ARMLegalizerInfo::setFCmpLibcallsGNU() {
+ // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be
+ // default-initialized.
+ FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}};
+ FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}};
+ FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}};
+ FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}};
+ FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}};
+ FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}};
+ FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}};
+ FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}};
+ FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}};
+ FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT},
+ {RTLIB::OLT_F32, CmpInst::ICMP_SLT}};
+ FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ},
+ {RTLIB::UO_F32, CmpInst::ICMP_NE}};
+
+ FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1);
+ FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}};
+ FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}};
+ FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}};
+ FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}};
+ FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}};
+ FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}};
+ FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}};
+ FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}};
+ FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}};
+ FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT},
+ {RTLIB::OLT_F64, CmpInst::ICMP_SLT}};
+ FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ},
+ {RTLIB::UO_F64, CmpInst::ICMP_NE}};
+}
+
+ARMLegalizerInfo::FCmpLibcallsList
+ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate,
+ unsigned Size) const {
+ assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate");
+ if (Size == 32)
+ return FCmp32Libcalls[Predicate];
+ if (Size == 64)
+ return FCmp64Libcalls[Predicate];
+ llvm_unreachable("Unsupported size for FCmp predicate");
+}
+
bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
using namespace TargetOpcode;
+ MIRBuilder.setInstr(MI);
+
switch (MI.getOpcode()) {
default:
return false;
@@ -137,9 +264,9 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
auto RetVal = MRI.createGenericVirtualRegister(
getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout()));
- auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy},
- {{MI.getOperand(1).getReg(), ArgTy},
- {MI.getOperand(2).getReg(), ArgTy}});
+ auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy},
+ {{MI.getOperand(1).getReg(), ArgTy},
+ {MI.getOperand(2).getReg(), ArgTy}});
if (Status != LegalizerHelper::Legalized)
return false;
@@ -149,8 +276,76 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
MIRBuilder.buildUnmerge(
{MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult},
RetVal);
+ break;
+ }
+ case G_FCMP: {
+ assert(MRI.getType(MI.getOperand(2).getReg()) ==
+ MRI.getType(MI.getOperand(3).getReg()) &&
+ "Mismatched operands for G_FCMP");
+ auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+
+ auto OriginalResult = MI.getOperand(0).getReg();
+ auto Predicate =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ auto Libcalls = getFCmpLibcalls(Predicate, OpSize);
+
+ if (Libcalls.empty()) {
+ assert((Predicate == CmpInst::FCMP_TRUE ||
+ Predicate == CmpInst::FCMP_FALSE) &&
+ "Predicate needs libcalls, but none specified");
+ MIRBuilder.buildConstant(OriginalResult,
+ Predicate == CmpInst::FCMP_TRUE ? 1 : 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size");
+ auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx);
+ auto *RetTy = Type::getInt32Ty(Ctx);
+
+ SmallVector<unsigned, 2> Results;
+ for (auto Libcall : Libcalls) {
+ auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ auto Status =
+ createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy},
+ {{MI.getOperand(2).getReg(), ArgTy},
+ {MI.getOperand(3).getReg(), ArgTy}});
+
+ if (Status != LegalizerHelper::Legalized)
+ return false;
- return LegalizerHelper::Legalized;
+ auto ProcessedResult =
+ Libcalls.size() == 1
+ ? OriginalResult
+ : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult));
+
+ // We have a result, but we need to transform it into a proper 1-bit 0 or
+ // 1, taking into account the different peculiarities of the values
+ // returned by the comparison functions.
+ CmpInst::Predicate ResultPred = Libcall.Predicate;
+ if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) {
+ // We have a nice 0 or 1, and we just need to truncate it back to 1 bit
+ // to keep the types consistent.
+ MIRBuilder.buildTrunc(ProcessedResult, LibcallResult);
+ } else {
+ // We need to compare against 0.
+ assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate");
+ auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero);
+ }
+ Results.push_back(ProcessedResult);
+ }
+
+ if (Results.size() != 1) {
+ assert(Results.size() == 2 && "Unexpected number of results");
+ MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]);
+ }
+ break;
}
}
+
+ MI.eraseFromParent();
+ return true;
}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index a9bdd367737e5..78ab9412c04ba 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -14,7 +14,10 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/IR/Instructions.h"
namespace llvm {
@@ -27,6 +30,36 @@ public:
bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;
+
+private:
+ void setFCmpLibcallsGNU();
+ void setFCmpLibcallsAEABI();
+
+ struct FCmpLibcallInfo {
+ // Which libcall this is.
+ RTLIB::Libcall LibcallID;
+
+ // The predicate to be used when comparing the value returned by the
+ // function with a relevant constant (currently hard-coded to zero). This is
+ // necessary because often the libcall will return e.g. a value greater than
+ // 0 to represent 'true' and anything negative to represent 'false', or
+ // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is
+ // needed, this should be CmpInst::BAD_ICMP_PREDICATE.
+ CmpInst::Predicate Predicate;
+ };
+ using FCmpLibcallsList = SmallVector<FCmpLibcallInfo, 2>;
+
+ // Map from each FCmp predicate to the corresponding libcall infos. A FCmp
+ // instruction may be lowered to one or two libcalls, which is why we need a
+ // list. If two libcalls are needed, their results will be OR'ed.
+ using FCmpLibcallsMapTy = IndexedMap<FCmpLibcallsList>;
+
+ FCmpLibcallsMapTy FCmp32Libcalls;
+ FCmpLibcallsMapTy FCmp64Libcalls;
+
+ // Get the libcall(s) corresponding to \p Predicate for operands of \p Size
+ // bits.
+ FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 11fb81a4f9fea..c0c09e8c15afd 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -212,8 +212,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- LLT Ty = MRI.getType(MI.getOperand(0).getReg());
-
unsigned NumOperands = MI.getNumOperands();
const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
@@ -236,26 +234,31 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
break;
case G_LOAD:
- case G_STORE:
+ case G_STORE: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
OperandsMapping =
Ty.getSizeInBits() == 64
? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
&ARM::ValueMappings[ARM::GPR3OpsIdx]})
: &ARM::ValueMappings[ARM::GPR3OpsIdx];
break;
- case G_FADD:
+ }
+ case G_FADD: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&
"Unsupported size for G_FADD");
OperandsMapping = Ty.getSizeInBits() == 64
? &ARM::ValueMappings[ARM::DPR3OpsIdx]
: &ARM::ValueMappings[ARM::SPR3OpsIdx];
break;
+ }
case G_CONSTANT:
case G_FRAME_INDEX:
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
case G_SELECT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(1).getReg());
(void)Ty2;
assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT");
@@ -277,9 +280,29 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&ARM::ValueMappings[ARM::GPR3OpsIdx]});
break;
}
+ case G_FCMP: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT Ty1 = MRI.getType(MI.getOperand(2).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ (void)Ty2;
+ assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP");
+ assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() &&
+ "Mismatched operand sizes for G_FCMP");
+
+ unsigned Size = Ty1.getSizeInBits();
+ assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP");
+
+ auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::DPR3OpsIdx];
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ FPRValueMapping, FPRValueMapping});
+ break;
+ }
case G_MERGE_VALUES: {
// We only support G_MERGE_VALUES for creating a double precision floating
// point value out of two GPRs.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
@@ -294,6 +317,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_UNMERGE_VALUES: {
// We only support G_UNMERGE_VALUES for splitting a double precision
// floating point value into two GPRs.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 ||
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 8eb9dbf5f9de6..51b0fedd2b54f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,6 +15,24 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
+bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // To inline a callee, all features not in the whitelist must match exactly.
+ bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
+ (CalleeBits & ~InlineFeatureWhitelist);
+ // For features in the whitelist, the callee's features must be a subset of
+ // the callers'.
+ bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
+ (CalleeBits & InlineFeatureWhitelist);
+ return MatchExact && MatchSubset;
+}
+
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 8a1a378638779..0695a4e633467 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
+ // Currently the following features are excluded from InlineFeatureWhitelist.
+ // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+ // Depending on whether they are set or unset, different
+ // instructions/registers are available. For example, inlining a callee with
+ // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
+ // fail if the callee uses ARM only instructions, e.g. in inline asm.
+ const FeatureBitset InlineFeatureWhitelist = {
+ ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
+ ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
+ ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+ ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
+ ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
+ ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
+ ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
+ ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
+ ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
+ ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
+ ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
+ ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
+ ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
+ ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
+ ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
+ ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
+ ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
+ ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
+ ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
+ ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
+ ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
+ ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
+ ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
+ ARM::FeatureNoNegativeImmediates
+ };
+
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
@@ -41,6 +74,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
bool enableInterleavedAccessVectorization() { return true; }
/// Floating-point computation using ARMv8 AArch32 Advanced
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 891b5c60e1fd6..1129826f21f64 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5249,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Fall though for the Identifier case that is not a register or a
// special name.
+ LLVM_FALLTHROUGH;
}
case AsmToken::LParen: // parenthesized expressions like (_strcmp-4)
case AsmToken::Integer: // things like 1f and 2b as a branch targets
@@ -8992,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
return PlainMatchResult;
}
+std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS);
+
static const char *getSubtargetFeatureName(uint64_t Val);
bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
@@ -9085,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(ErrorLoc, "invalid operand for instruction");
}
- case Match_MnemonicFail:
- return Error(IDLoc, "invalid instruction",
+ case Match_MnemonicFail: {
+ uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+ std::string Suggestion = ARMMnemonicSpellCheck(
+ ((ARMOperand &)*Operands[0]).getToken(), FBS);
+ return Error(IDLoc, "invalid instruction" + Suggestion,
((ARMOperand &)*Operands[0]).getLocRange());
+ }
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 22de728fe06e1..a77df7a2598f4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -361,9 +361,8 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target, uint64_t Value,
- bool IsPCRel, MCContext &Ctx,
- bool IsLittleEndian,
- bool IsResolved) const {
+ bool IsResolved, MCContext &Ctx,
+ bool IsLittleEndian) const {
unsigned Kind = Fixup.getKind();
// MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT
@@ -392,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
case FK_SecRel_4:
return Value;
case ARM::fixup_arm_movt_hi16:
- if (!IsPCRel)
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_arm_movw_lo16: {
@@ -404,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
return Value;
}
case ARM::fixup_t2_movt_hi16:
- if (!IsPCRel)
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
Value >>= 16;
LLVM_FALLTHROUGH;
case ARM::fixup_t2_movw_lo16: {
@@ -885,11 +884,11 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
- bool IsPCRel) const {
+ bool IsResolved) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
MCContext &Ctx = Asm.getContext();
- Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx,
- IsLittleEndian, true);
+ Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx,
+ IsLittleEndian);
if (!Value)
return; // Doesn't change encoding.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 84b54bbb9a49b..02374966dafe7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -42,13 +42,13 @@ public:
const MCValue &Target) override;
unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, uint64_t Value, bool IsPCRel,
- MCContext &Ctx, bool IsLittleEndian,
- bool IsResolved) const;
+ const MCValue &Target, uint64_t Value,
+ bool IsResolved, MCContext &Ctx,
+ bool IsLittleEndian) const;
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsResolved) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 0b6574c37de12..5709b4e617987 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -236,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R12:
if (STI.splitFramePushPop(MF))
break;
- // fallthough
+ LLVM_FALLTHROUGH;
case ARM::R0:
case ARM::R1:
case ARM::R2: