summaryrefslogtreecommitdiff
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td3
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp3
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp54
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td10
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp105
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp6
-rw-r--r--lib/Target/ARM/ARMMacroFusion.cpp57
-rw-r--r--lib/Target/ARM/ARMMacroFusion.h24
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp10
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp66
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp63
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp7
18 files changed, 357 insertions, 91 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ca68f5d42c32c..6f67183df6a18 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -100,7 +100,8 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Enable Reliability, Availability and Serviceability extensions">;
def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
"Enable fast computation of positive address offsets">;
-
+def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
// Cyclone has preferred instructions for zeroing VFP registers, which can
// execute in 0 cycles.
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index f9da036c7e468..90f635c812542 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1504,6 +1504,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case ARM::CONSTPOOL_ENTRY: {
+ if (Subtarget->genExecuteOnly())
+ llvm_unreachable("execute-only should not generate constant pools");
+
/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
/// in the function. The first operand is the ID# for this instruction, the
/// second is the index into the MachineConstantPool that this is, the third
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 8715657ad5e25..e0810c358f2da 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -665,12 +665,14 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
const ARMFunctionInfo *AFI =
MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.
+ // In their ARM encoding, they can't be encoded in a conditional form.
+ if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
+
if (AFI->isThumb2Function()) {
if (getSubtarget().restrictIT())
return isV8EligibleForIT(&MI);
- } else { // non-Thumb
- if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
- return false;
}
return true;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5b2d093e8f0da..2bcc707e9fc3c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2669,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
// into MOVi.
-static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
// FIXME there is no actual debug info here
SDLoc dl(Op);
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
SDValue Res;
+
+ // When generating execute-only code Constant Pools must be promoted to the
+ // global data section. It's a bit ugly that we can't share them across basic
+ // blocks, but this way we guarantee that execute-only behaves correct with
+ // position-independent addressing modes.
+ if (Subtarget->genExecuteOnly()) {
+ auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+ auto T = const_cast<Type*>(CP->getType());
+ auto C = const_cast<Constant*>(CP->getConstVal());
+ auto M = const_cast<Module*>(DAG.getMachineFunction().
+ getFunction()->getParent());
+ auto GV = new GlobalVariable(
+ *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
+ Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
+ Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
+ Twine(AFI->createPICLabelUId())
+ );
+ SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
+ dl, PtrVT);
+ return LowerGlobalAddress(GA, DAG);
+ }
+
if (CP->isMachineConstantPoolEntry())
Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
CP->getAlignment());
@@ -3118,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) {
isa<Function>(GV);
}
+SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -7634,21 +7670,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
- case ISD::ConstantPool:
- if (Subtarget->genExecuteOnly())
- llvm_unreachable("execute-only should not generate constant pools");
- return LowerConstantPool(Op, DAG);
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::GlobalAddress:
- switch (Subtarget->getTargetTriple().getObjectFormat()) {
- default: llvm_unreachable("unknown object format");
- case Triple::COFF:
- return LowerGlobalAddressWindows(Op, DAG);
- case Triple::ELF:
- return LowerGlobalAddressELF(Op, DAG);
- case Triple::MachO:
- return LowerGlobalAddressDarwin(Op, DAG);
- }
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 26da528c19e6d..5044134f5b1e2 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -601,6 +601,8 @@ class InstrItineraryData;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 45471a4e95b39..53db5acbe805c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -4756,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
(ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+// Aliases for the above with the .w qualifier
+def : t2InstAlias<"mov${p}.w $Rd, $shift",
+ (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def : t2InstAlias<"movs${p}.w $Rd, $shift",
+ (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def : t2InstAlias<"mov${p}.w $Rd, $shift",
+ (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def : t2InstAlias<"movs${p}.w $Rd, $shift",
+ (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
// ADR w/o the .w suffix
def : t2InstAlias<"adr${p} $Rd, $addr",
(t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 2ae3bad4076b0..4cb0eca5ee5f8 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -42,6 +42,10 @@ public:
private:
bool selectImpl(MachineInstr &I) const;
+ bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const;
+
const ARMBaseInstrInfo &TII;
const ARMBaseRegisterInfo &TRI;
const ARMBaseTargetMachine &TM;
@@ -243,6 +247,105 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
return Opc;
}
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ // AL is our "false" for now. The other two need more compares.
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) const {
+ auto &MBB = *MIB->getParent();
+ auto InsertBefore = std::next(MIB->getIterator());
+ auto &DebugLoc = MIB->getDebugLoc();
+
+ // Move 0 into the result register.
+ auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi))
+ .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass))
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI))
+ return false;
+
+ // Perform the comparison.
+ auto LHSReg = MIB->getOperand(2).getReg();
+ auto RHSReg = MIB->getOperand(3).getReg();
+ assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) &&
+ MRI.getType(LHSReg).getSizeInBits() == 32 &&
+ MRI.getType(RHSReg).getSizeInBits() == 32 &&
+ "Unsupported types for comparison operation");
+ auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr))
+ .addUse(LHSReg)
+ .addUse(RHSReg)
+ .add(predOps(ARMCC::AL));
+ if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI))
+ return false;
+
+ // Move 1 into the result register if the flags say so.
+ auto ResReg = MIB->getOperand(0).getReg();
+ auto Cond =
+ static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate());
+ auto ARMCond = getComparePred(Cond);
+ if (ARMCond == ARMCC::AL)
+ return false;
+
+ auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi))
+ .addDef(ResReg)
+ .addUse(Mov0I->getOperand(0).getReg())
+ .addImm(1)
+ .add(predOps(ARMCond, ARM::CPSR));
+ if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI))
+ return false;
+
+ MIB->eraseFromParent();
+ return true;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -343,6 +446,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(COPY));
return selectCopy(I, TII, MRI, TRI, RBI);
}
+ case G_ICMP:
+ return selectICmp(MIB, TII, MRI, TRI, RBI);
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index a706079d98662..5873c7fb38729 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -86,6 +86,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
setAction({G_CONSTANT, s32}, Legal);
+ setAction({G_ICMP, s1}, Legal);
+ for (auto Ty : {s8, s16})
+ setAction({G_ICMP, 1, Ty}, WidenScalar);
+ for (auto Ty : {s32, p0})
+ setAction({G_ICMP, 1, Ty}, Legal);
+
if (!ST.useSoftFloat() && ST.hasVFP2()) {
setAction({G_FADD, s32}, Legal);
setAction({G_FADD, s64}, Legal);
diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp
new file mode 100644
index 0000000000000..1b6e97c28d453
--- /dev/null
+++ b/lib/Target/ARM/ARMMacroFusion.cpp
@@ -0,0 +1,57 @@
+//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the ARM implementation of the DAG scheduling
+/// mutation to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMacroFusion.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MacroFusion.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given SecondMI, when FirstMI is unspecified, then check if
+/// SecondMI may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ if (ST.hasFuseAES())
+ // Fuse AES crypto operations.
+ switch(SecondOpcode) {
+ // AES encode.
+ case ARM::AESMC :
+ return FirstOpcode == ARM::AESE ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ // AES decode.
+ case ARM::AESIMC:
+ return FirstOpcode == ARM::AESD ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ }
+
+ return false;
+}
+
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () {
+ return createMacroFusionDAGMutation(shouldScheduleAdjacent);
+}
+
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMMacroFusion.h b/lib/Target/ARM/ARMMacroFusion.h
new file mode 100644
index 0000000000000..1e4fc6687eae8
--- /dev/null
+++ b/lib/Target/ARM/ARMMacroFusion.h
@@ -0,0 +1,24 @@
+//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file contains the ARM definition of the DAG scheduling mutation
+/// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createARMMacroFusionDAGMutation());
+/// to ARMPassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation();
+
+} // llvm
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index f59b075e6dd9a..2350d0c6ef69e 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -255,6 +255,16 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping =
getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case G_ICMP: {
+ LLT Ty2 = MRI.getType(MI.getOperand(2).getReg());
+ (void)Ty2;
+ assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP");
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ &ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]});
+ break;
+ }
case G_MERGE_VALUES: {
// We only support G_MERGE_VALUES for creating a double precision floating
// point value out of two GPRs.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index af682dd8321cf..d890d0fa777e8 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -285,6 +285,10 @@ protected:
/// HasFPAO - if true, processor does positive address offset computation faster
bool HasFPAO = false;
+ /// HasFuseAES - if true, processor executes back to back AES instruction
+ /// pairs faster.
+ bool HasFuseAES = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@@ -561,6 +565,10 @@ public:
bool hasD16() const { return HasD16; }
bool hasFullFP16() const { return HasFullFP16; }
+ bool hasFuseAES() const { return HasFuseAES; }
+ /// \brief Return true if the CPU supports any kind of instruction fusion.
+ bool hasFusion() const { return hasFuseAES(); }
+
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index c0506cfda6129..eb71e557ec917 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -17,6 +17,7 @@
#include "ARMRegisterBankInfo.h"
#endif
#include "ARMSubtarget.h"
+#include "ARMMacroFusion.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
@@ -394,6 +395,9 @@ public:
createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
// add DAG Mutations here.
+ const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+ if (ST.hasFusion())
+ DAG->addMutation(createARMMacroFusionDAGMutation());
return DAG;
}
@@ -401,6 +405,9 @@ public:
createPostMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
// add DAG Mutations here.
+ const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>();
+ if (ST.hasFusion())
+ DAG->addMutation(createARMMacroFusionDAGMutation());
return DAG;
}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 19fba3033bb2b..891b5c60e1fd6 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6860,6 +6860,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
bool ARMAsmParser::processInstruction(MCInst &Inst,
const OperandVector &Operands,
MCStreamer &Out) {
+ // Check if we have the wide qualifier, because if it's present we
+ // must avoid selecting a 16-bit thumb instruction.
+ bool HasWideQualifier = false;
+ for (auto &Op : Operands) {
+ ARMOperand &ARMOp = static_cast<ARMOperand&>(*Op);
+ if (ARMOp.isToken() && ARMOp.getToken() == ".w") {
+ HasWideQualifier = true;
+ break;
+ }
+ }
+
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -6939,8 +6950,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// Select the narrow version if the immediate will fit.
if (Inst.getOperand(1).getImm() > 0 &&
Inst.getOperand(1).getImm() <= 0xff &&
- !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
+ !HasWideQualifier)
Inst.setOpcode(ARM::tLDRpci);
else
Inst.setOpcode(ARM::t2LDRpci);
@@ -6971,10 +6981,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
else if (Inst.getOpcode() == ARM::t2LDRConstPool)
TmpInst.setOpcode(ARM::t2LDRpci);
const ARMOperand &PoolOperand =
- (static_cast<ARMOperand &>(*Operands[2]).isToken() &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ?
- static_cast<ARMOperand &>(*Operands[4]) :
- static_cast<ARMOperand &>(*Operands[3]);
+ (HasWideQualifier ?
+ static_cast<ARMOperand &>(*Operands[4]) :
+ static_cast<ARMOperand &>(*Operands[3]));
const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm();
// If SubExprVal is a constant we may be able to use a MOV
if (isa<MCConstantExpr>(SubExprVal) &&
@@ -8117,8 +8126,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
- !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8152,7 +8160,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg()) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
- inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) &&
+ !HasWideQualifier)
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
@@ -8186,7 +8195,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
bool isNarrow = false;
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
- inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+ inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) &&
+ !HasWideQualifier)
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
@@ -8415,10 +8425,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
(Inst.getOperand(2).isImm() &&
(unsigned)Inst.getOperand(2).getImm() > 255) ||
- ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
- (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) ||
+ HasWideQualifier)
break;
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
@@ -8447,8 +8455,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
}
if (!Transform ||
Inst.getOperand(5).getReg() != 0 ||
- (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
+ HasWideQualifier)
break;
MCInst TmpInst;
TmpInst.setOpcode(ARM::tADDhirr);
@@ -8568,11 +8575,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
(Inst.getOperand(1).isImm() &&
(unsigned)Inst.getOperand(1).getImm() <= 255) &&
- ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
- Inst.getOperand(4).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
// The operands aren't in the same order for tMOVi8...
MCInst TmpInst;
TmpInst.setOpcode(ARM::tMOVi8);
@@ -8593,8 +8597,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ !HasWideQualifier) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
@@ -8616,8 +8619,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == 0 &&
- (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
- static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Illegal opcode!");
@@ -8716,11 +8718,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
if ((isARMLowRegister(Inst.getOperand(1).getReg()) &&
isARMLowRegister(Inst.getOperand(2).getReg())) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
- ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
- !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
- ".w"))) {
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8756,11 +8755,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(2).getReg())) &&
(Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
- ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
- !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
- ".w"))) {
+ Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+ !HasWideQualifier) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 3cde43967568b..cf6827fd6ca19 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -49,6 +49,7 @@ add_llvm_target(ARMCodeGen
ARMLoadStoreOptimizer.cpp
ARMMCInstLower.cpp
ARMMachineFunctionInfo.cpp
+ ARMMacroFusion.cpp
ARMRegisterInfo.cpp
ARMOptimizeBarriersPass.cpp
ARMSelectionDAGInfo.cpp
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 716492ea25662..81760f03940ad 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -358,11 +358,27 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
return Value;
}
-unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target, uint64_t Value,
bool IsPCRel, MCContext &Ctx,
bool IsLittleEndian,
bool IsResolved) const {
unsigned Kind = Fixup.getKind();
+
+ // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT
+ // and .word relocations they put the Thumb bit into the addend if possible.
+ // Other relocation types don't want this bit though (branches couldn't encode
+ // it if it *was* present, and no other relocations exist) and it can
+ // interfere with checking valid expressions.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) &&
+ (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 ||
+ Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 ||
+ Kind == ARM::fixup_t2_movt_hi16))
+ Value |= 1;
+ }
+
switch (Kind) {
default:
Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
@@ -505,6 +521,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_thumb_bl: {
+ // FIXME: We get both thumb1 and thumb2 in here, so we can only check for
+ // the less strict thumb2 value.
+ if (!isInt<26>(Value - 4)) {
+ Ctx.reportError(Fixup.getLoc(), "Relocation out of range");
+ return 0;
+ }
+
// The value doesn't encode the low bit (always zero) and is offset by
// four. The 32-bit immediate value is encoded as
// imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
@@ -716,29 +739,11 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) {
+ const MCValue &Target, bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
const unsigned FixupKind = Fixup.getKind() ;
- // MachO (the only user of "Value") tries to make .o files that look vaguely
- // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
- // into the addend if possible. Other relocation types don't want this bit
- // though (branches couldn't encode it if it *was* present, and no other
- // relocations exist) and it can interfere with checking valid expressions.
- if (FixupKind == FK_Data_4 ||
- FixupKind == ARM::fixup_arm_movw_lo16 ||
- FixupKind == ARM::fixup_arm_movt_hi16 ||
- FixupKind == ARM::fixup_t2_movw_lo16 ||
- FixupKind == ARM::fixup_t2_movt_hi16) {
- if (Sym) {
- if (Asm.isThumbFunc(Sym))
- Value |= 1;
- }
- }
if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
assert(Sym && "How did we resolve this?");
@@ -747,7 +752,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// If the symbol is out of range, produce a relocation and hope the
// linker can handle it. GNU AS produces an error in this case.
- if (Sym->isExternal() || Value >= 0x400004)
+ if (Sym->isExternal())
IsResolved = false;
}
// Create relocations for unconditional branches to function symbols with
@@ -759,6 +764,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
IsResolved = false;
if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br ||
FixupKind == ARM::fixup_arm_thumb_bl ||
+ FixupKind == ARM::fixup_t2_condbranch ||
FixupKind == ARM::fixup_t2_uncondbranch))
IsResolved = false;
}
@@ -875,22 +881,25 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
}
}
-void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value, bool IsPCRel,
- MCContext &Ctx) const {
+void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target,
+ MutableArrayRef<char> Data, uint64_t Value,
+ bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true);
+ MCContext &Ctx = Asm.getContext();
+ Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx,
+ IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+ assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!");
// Used to point to big endian bytes.
unsigned FullSizeBytes;
if (!IsLittleEndian) {
FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
- assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!");
+ assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!");
assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 2ddedb5d61059..6a0ba2ed41c1a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -40,17 +40,17 @@ public:
/// processFixupValue - Target hook to process the literal value of a fixup
/// if necessary.
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
+ void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, bool &IsResolved) override;
- unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+ unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, uint64_t Value, bool IsPCRel,
MCContext &Ctx, bool IsLittleEndian,
bool IsResolved) const;
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+ void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target, MutableArrayRef<char> Data,
+ uint64_t Value, bool IsPCRel) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 00505a103e00f..f74fb2e20b5a3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -33,8 +33,8 @@ public:
~ARMWinCOFFObjectWriter() override = default;
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection,
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
const MCAsmBackend &MAB) const override;
bool recordRelocation(const MCFixup &) const override;
@@ -42,7 +42,8 @@ public:
} // end anonymous namespace
-unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
+unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
const MCAsmBackend &MAB) const {