summaryrefslogtreecommitdiff
path: root/lib/Target/AArch64
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-07-01 13:22:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-07-01 13:22:02 +0000
commit9df3605dea17e84f8183581f6103bd0c79e2a606 (patch)
tree70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib/Target/AArch64
parent08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff)
Diffstat (limited to 'lib/Target/AArch64')
-rw-r--r--lib/Target/AArch64/AArch64CondBrTuning.cpp7
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp48
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp5
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp9
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp9
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp15
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp2
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td1221
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp4
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h1
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp77
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h6
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp32
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp123
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h38
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp18
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp65
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp37
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h43
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt2
27 files changed, 1511 insertions, 275 deletions
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp
index f27bc97ec3f3..0a948812ff33 100644
--- a/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -22,7 +22,7 @@
/// cbz w8, .LBB1_2 -> b.eq .LBB1_2
///
/// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses.
-/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2
+/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2
///
//===----------------------------------------------------------------------===//
@@ -129,11 +129,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) {
break;
case AArch64::TBZW:
case AArch64::TBZX:
- CC = AArch64CC::GE;
+ CC = AArch64CC::PL;
break;
case AArch64::TBNZW:
case AArch64::TBNZX:
- CC = AArch64CC::LT;
+ CC = AArch64CC::MI;
break;
}
return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc))
@@ -271,6 +271,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI,
}
break;
}
+ (void)NewCmp; (void)NewBr;
assert(NewCmp && NewBr && "Expected new instructions.");
DEBUG(dbgs() << " with instruction:\n ");
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 00a0111f2bd2..9eda56c825a9 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -139,6 +140,7 @@ class SSACCmpConv {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ const MachineBranchProbabilityInfo *MBPI;
public:
/// The first block containing a conditional branch, dominating everything
@@ -186,8 +188,10 @@ private:
public:
/// runOnMachineFunction - Initialize per-function data structures.
- void runOnMachineFunction(MachineFunction &MF) {
+ void runOnMachineFunction(MachineFunction &MF,
+ const MachineBranchProbabilityInfo *MBPI) {
this->MF = &MF;
+ this->MBPI = MBPI;
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
@@ -564,8 +568,40 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
updateTailPHIs();
- Head->removeSuccessor(CmpBB, true);
- CmpBB->removeSuccessor(Tail, true);
+
+ // Save successor probabilties before removing CmpBB and Tail from their
+ // parents.
+ BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
+ BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
+
+ Head->removeSuccessor(CmpBB);
+ CmpBB->removeSuccessor(Tail);
+
+ // If Head and CmpBB had successor probabilties, udpate the probabilities to
+ // reflect the ccmp-conversion.
+ if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
+
+ // Head is allowed two successors. We've removed CmpBB, so the remaining
+ // successor is Tail. We need to increase the successor probability for
+ // Tail to account for the CmpBB path we removed.
+ //
+ // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
+ assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
+ BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
+ Head->setSuccProbability(Head->succ_begin(),
+ Head2Tail + Head2CmpBB * CmpBB2Tail);
+
+ // We will transfer successors of CmpBB to Head in a moment without
+ // normalizing the successor probabilities. Set the successor probabilites
+ // before doing so.
+ //
+ // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
+ for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
+ BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
+ CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
+ }
+ }
+
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
TII->removeBranch(*Head);
@@ -717,6 +753,7 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
namespace {
class AArch64ConditionalCompares : public MachineFunctionPass {
+ const MachineBranchProbabilityInfo *MBPI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MCSchedModel SchedModel;
@@ -753,6 +790,7 @@ char AArch64ConditionalCompares::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
"AArch64 CCMP Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
@@ -763,6 +801,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() {
}
void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -892,12 +931,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
MinSize = MF.getFunction()->optForMinSize();
bool Changed = false;
- CmpConv.runOnMachineFunction(MF);
+ CmpConv.runOnMachineFunction(MF, MBPI);
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
// cmp-conversions from the same head block.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2965106fd270..aaf32a499bc3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7561,8 +7561,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
-
+ SubVec = Builder.CreateIntToPtr(
+ SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
+ VecTy->getVectorNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ad24612239fa..6cb723d187af 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -735,7 +735,7 @@ def : ShiftAlias<"rorv", RORVWr, GPR32>;
def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
-let AddedComplexity = 7 in {
+let AddedComplexity = 5 in {
defm MADD : MulAccum<0, "madd", add>;
defm MSUB : MulAccum<1, "msub", sub>;
@@ -752,7 +752,7 @@ def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-} // AddedComplexity = 7
+} // AddedComplexity = 5
let AddedComplexity = 5 in {
def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 9bfd570e9a82..07ce0e863c5e 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -947,7 +947,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
if (DstRB.getID() != SrcRB.getID()) {
- DEBUG(dbgs() << "G_TRUNC input/output on different banks\n");
+ DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
return false;
}
@@ -964,16 +964,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
- DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+ DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
return false;
}
if (DstRC == SrcRC) {
// Nothing to be done
+ } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
+ SrcTy == LLT::scalar(64)) {
+ llvm_unreachable("TableGen can import this case");
+ return false;
} else if (DstRC == &AArch64::GPR32RegClass &&
SrcRC == &AArch64::GPR64RegClass) {
I.getOperand(1).setSubReg(AArch64::sub_32);
} else {
+ DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
return false;
}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 01196817f311..4b568f3fba2b 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -39,6 +39,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
+ for (auto Ty : {p0, s1, s8, s16, s32, s64})
+ setAction({G_IMPLICIT_DEF, Ty}, Legal);
+
for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
@@ -99,6 +102,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
// G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
}
+ for (auto Ty : {s1, s8, s16, s32, s64, p0})
+ setAction({G_EXTRACT, Ty}, Legal);
+
+ for (auto Ty : {s32, s64})
+ setAction({G_EXTRACT, 1, Ty}, Legal);
+
for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 45083df7ab45..f82b9dbc2c9f 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -151,13 +151,24 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
+MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::createAdd(
+ Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+ return MCOperand::createExpr(Expr);
+}
+
MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
if (Printer.TM.getTargetTriple().isOSDarwin())
return lowerSymbolOperandDarwin(MO, Sym);
+ if (Printer.TM.getTargetTriple().isOSBinFormatCOFF())
+ return lowerSymbolOperandCOFF(MO, Sym);
- assert(Printer.TM.getTargetTriple().isOSBinFormatELF() &&
- "Expect Darwin or ELF target");
+ assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && "Invalid target");
return lowerSymbolOperandELF(MO, Sym);
}
diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index 1e29b80c2d62..aa30fe1fa707 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -42,6 +42,8 @@ public:
MCSymbol *Sym) const;
MCOperand lowerSymbolOperandELF(const MachineOperand &MO,
MCSymbol *Sym) const;
+ MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO,
+ MCSymbol *Sym) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index baf15ac540cf..fab92e139dd0 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -94,7 +94,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "Invalid target");
return CSR_AArch64_TLS_ELF_RegMask;
}
diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 3654eeca530a..10df50bcf156 100644
--- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,4 +1,4 @@
-//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=//
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -79,75 +79,207 @@ def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
// 60 entry unified scheduler.
def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
- THX2T99P3, THX2T99P4, THX2T99P5]> {
- let BufferSize=60;
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
+ let BufferSize = 60;
}
// Define commonly used write types for InstRW specializations.
// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
// 3 cycles on I1.
-def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; }
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on I2.
+def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
// 4 cycles on I1.
-def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; }
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// 23 cycles on I1.
+def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
+}
+
+// 39 cycles on I1.
+def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> {
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
+}
// 1 cycle on I0, I1, or I2.
-def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; }
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// 2 cycles on I0, I1, or I2.
+def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+// 4 cycles on I0, I1, or I2.
+def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// 5 cycles on I0, I1, or I2.
+def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
// 5 cycles on F1.
-def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// 7 cycles on F1.
-def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
// 4 cycles on F0 or F1.
-def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
// 5 cycles on F0 or F1.
-def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// 6 cycles on F0 or F1.
-def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
// 7 cycles on F0 or F1.
-def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
// 8 cycles on F0 or F1.
-def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; }
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+// 10 cycles on F0 or F1.
+def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
// 16 cycles on F0 or F1.
def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
+ let NumMicroOps = 3;
let ResourceCycles = [8];
}
// 23 cycles on F0 or F1.
def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
+ let NumMicroOps = 3;
let ResourceCycles = [11];
}
// 1 cycles on LS0 or LS1.
-def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; }
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 0;
+}
+
+// 1 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_1Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+// 2 cycles on LS0 or LS1.
+def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
// 4 cycles on LS0 or LS1.
-def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
// 5 cycles on LS0 or LS1.
-def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
// 6 cycles on LS0 or LS1.
-def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_4Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
// 5 cycles on LS0 or LS1 and I0, I1, or I2.
def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
-def THX2T99Write_6Cyc_LS01_I012_I012 :
+def THX2T99Write_5Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+// 6 cycles on LS0 or LS1 and I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
+def THX2T99Write_6Cyc_LS01_I012_I012 :
SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
let Latency = 6;
let NumMicroOps = 3;
@@ -162,25 +294,25 @@ def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
// 5 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 5;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 6 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 6;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 7 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 7;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// 8 cycles on LS0 or LS1 and F0 or F1.
def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 8;
- let NumMicroOps = 2;
+ let NumMicroOps = 3;
}
// Define commonly used read types.
@@ -195,10 +327,8 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
-
}
-
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -211,88 +341,217 @@ let SchedModel = ThunderX2T99Model in {
// Branch, immed
// Branch and link, immed
// Compare and branch
-def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; }
+def : WriteRes<WriteBr, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Branch, register
+// Branch and link, register != LR
+// Branch and link, register = LR
+def : WriteRes<WriteBrReg, [THX2T99I2]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
def : WriteRes<WriteSys, []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
def : WriteRes<WriteHint, []> { let Latency = 1; }
-def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteAtomic, []> {
+ let Unsupported = 1;
+ let NumMicroOps = 2;
+}
-// Branch, register
-// Branch and link, register != LR
-// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; }
+//---
+// Branch
+//---
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>;
+def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>;
+def : InstRW<[THX2T99Write_1Cyc_I2],
+ (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
//---
// 3.2 Arithmetic and Logical Instructions
// 3.3 Move and Shift Instructions
//---
+
// ALU, basic
// Conditional compare
// Conditional select
// Address generation
-def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteI, [THX2T99I012]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[WriteI],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
- let ResourceCycles = [2];
+ let ResourceCycles = [2, 3];
+ let NumMicroOps = 2;
}
+def : InstRW<[WriteISReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
def : WriteRes<WriteIEReg, [THX2T99I012]> {
- let Latency = 2;
- let ResourceCycles = [2];
+ let Latency = 1;
+ let ResourceCycles = [1, 3];
+ let NumMicroOps = 2;
}
+def : InstRW<[WriteIEReg],
+ (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
+ "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
+ "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)",
+ "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
+ "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
+ "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
+ "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)",
+ "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)",
+ "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)",
+ "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)",
+ "CSNEG?(W|X)r(i|r|s|x)")>;
+
// Move immed
-def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteImm, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
// Variable shift
-def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteIS, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
//---
// 3.4 Divide and Multiply Instructions
//---
// Divide, W-form
-// Latency range of 13-23. Take the average.
+// Latency range of 13-23/13-39.
def : WriteRes<WriteID32, [THX2T99I1]> {
- let Latency = 18;
- let ResourceCycles = [18];
+ let Latency = 39;
+ let ResourceCycles = [13, 39];
+ let NumMicroOps = 4;
}
// Divide, X-form
-// Latency range of 13-39. Take the average.
def : WriteRes<WriteID64, [THX2T99I1]> {
- let Latency = 26;
- let ResourceCycles = [26];
+ let Latency = 23;
+ let ResourceCycles = [13, 23];
+ let NumMicroOps = 4;
}
// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; }
+def : WriteRes<WriteIM32, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [THX2T99I012]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
+//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012],
+// (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
+def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
+def : InstRW<[THX2T99Write_5Cyc_I012],
+ (instregex "(S|U)(MADDL|MSUBL)rrr")>;
+
+def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
+def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteExtr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Multiply high
+def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>;
+
+// Miscellaneous Data-Processing Instructions
+// Bitfield extract
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitifield move - basic
+def : InstRW<[THX2T99Write_1Cyc_I012],
+ (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
-// Bitfield move, basic
// Bitfield move, insert
-// NOTE: Handled by WriteIS.
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>;
+def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>;
// Count leading
def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
- "^CLZ(W|X)r$")>;
+ "^CLZ(W|X)r$")>;
+
+// Reverse bits
+def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>;
+
+// Cryptography Extensions
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>;
+
+// CRC Instructions
+// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>;
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>;
+
+def : InstRW<[THX2T99Write_4Cyc_I1],
+ (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>;
// Reverse bits/bytes
// NOTE: Handled by WriteI.
//---
-// 3.6 Load Instructions
+// 3.6 Load Instructions
// 3.10 FP Load Instructions
//---
@@ -300,13 +559,29 @@ def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
// Load register, unscaled immed
// Load register, immed unprivileged
// Load register, unsigned immed
-def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; }
+def : WriteRes<WriteLD, [THX2T99LS01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
// Load register, immed post-index
// NOTE: Handled by WriteLD, WriteI.
// Load register, immed pre-index
// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; }
+def : WriteRes<WriteAdr, [THX2T99I012]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+// Load pair, immed offset, normal
+// Load pair, immed offset, signed words, base != SP
+// Load pair, immed offset signed words, base = SP
+// LDP only breaks into *one* LS micro-op. Thus
+// the resources are handled by WriteLD.
+def : WriteRes<WriteLDHi, []> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+}
// Load register offset, basic
// Load register, register offset, scale by 4/8
@@ -324,23 +599,229 @@ def THX2T99ReadAdrBase : SchedReadVariant<[
SchedVar<NoSchedPred, [ReadDefault]>]>;
def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
-// Load pair, immed offset, normal
-// Load pair, immed offset, signed words, base != SP
-// Load pair, immed offset signed words, base = SP
-// LDP only breaks into *one* LS micro-op. Thus
-// the resources are handling by WriteLD.
-def : WriteRes<WriteLDHi, []> {
- let Latency = 5;
-}
-
// Load pair, immed pre-index, normal
// Load pair, immed pre-index, signed words
// Load pair, immed post-index, normal
// Load pair, immed post-index, signed words
// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr],
+ (instrs LDPXpost)>;
+
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroW)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRBroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRDroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRHHroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRQroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRSHXroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRWroX)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs LDRXroX)>;
+
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>;
+def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>;
+
+//---
+// Prefetch
+//---
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>;
+
//--
-// 3.7 Store Instructions
+// 3.7 Store Instructions
// 3.11 FP Store Instructions
//--
@@ -382,6 +863,195 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
// Store pair, immed pre-index, X-form
// NOTE: Handled by WriteAdr, WriteSTP.
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>;
+
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPDpre, STPDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPQpre, STPQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPSpre, STPSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPWpre, STPWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STPXpre, STPXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STPXpre, STPXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBpre, STRBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBpre, STRBBpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDpre, STRDpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHpre, STRHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHpre, STRHHpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQpre, STRQpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSpre, STRSpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWpre, STRWpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012],
+ (instrs STRXpre, STRXpost)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXpre, STRXpost)>;
+
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBroW, STRBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRBBroW, STRBBroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRDroW, STRDroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHroW, STRHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRHHroW, STRHHroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRQroW, STRQroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRSroW, STRSroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRWroW, STRWroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase],
+ (instrs STRXroW, STRXroX)>;
+
//---
// 3.8 FP Data Processing Instructions
//---
@@ -389,28 +1059,95 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
// FP absolute value
// FP min/max
// FP negate
-def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; }
+def : WriteRes<WriteF, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
// FP arithmetic
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
// FP compare
-def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; }
+def : WriteRes<WriteFCmp, [THX2T99F01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
-// FP divide, S-form
-// FP square root, S-form
-def : WriteRes<WriteFDiv, [THX2T99F01]> {
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
+ let NumMicroOps = 4;
}
+def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 16;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+
+def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 23;
+ let ResourceCycles = [12];
+ let NumMicroOps = 4;
+}
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>;
+def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>;
+
// FP divide, D-form
// FP square root, D-form
-def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>;
+def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>;
// FP multiply
// FP multiply accumulate
-def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> {
+ let Latency = 6;
+ let ResourceCycles = [2];
+ let NumMicroOps = 3;
+}
+
+def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>;
+def : InstRW<[THX2T99XWriteFMulAcc],
+ (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>;
// FP round to integral
def : InstRW<[THX2T99Write_7Cyc_F01],
@@ -426,15 +1163,25 @@ def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
// FP convert, from vec to vec reg
// FP convert, from gen to vec reg
// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; }
+def : WriteRes<WriteFCvt, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
// FP move, immed
// FP move, register
-def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [THX2T99F01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
//---
@@ -470,19 +1217,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; }
+def : WriteRes<WriteV, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4, 23];
+}
// ASIMD arith, reduce, 4H/4S
// ASIMD arith, reduce, 8B/8H
// ASIMD arith, reduce, 16B
-def : InstRW<[THX2T99Write_5Cyc_F01],
- (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD logical (MOV, MVN, ORN, ORR)
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv",
+ "^ORRv", "^ORNv", "^NOTv")>;
+// ASIMD arith, reduce
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD polynomial (8x8) multiply long
-def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>;
+
+// ASIMD absolute diff accum, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
+// ASIMD absolute diff accum, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
+// ASIMD absolute diff accum long
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]ABAL")>;
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
+// ASIMD arith, reduce, 8B
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
+// ASIMD arith, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU]?ADDL?Vv16i8v$")>;
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
+// ASIMD max/min, reduce, 16B/16H
+def : InstRW<[THX2T99Write_10Cyc_F01],
+ (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
+// ASIMD multiply, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)" #
+ "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
+ "(_indexed)?$")>;
+// ASIMD multiply, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
+// ASIMD multiply accumulate, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
+// ASIMD shift accumulate
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv",
+ "SQSHRNv","SQSHRUNv", "UQRSHRNv",
+ "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>;
+// ASIMD shift by immed, complex
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>;
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
+// ASIMD shift by register, complex, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL" #
+ "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD Arithmetic
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
+ "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
+ "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv",
+ "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
+ "^SRHADD", "^SUBHNv", "^SUQADD",
+ "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^CMEQv","^CMGEv","^CMGTv",
+ "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv",
+ "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>;
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>;
//---
// 3.13 ASIMD Floating-point Instructions
@@ -493,7 +1356,8 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
// ASIMD FP arith, normal, D-form
// ASIMD FP arith, normal, Q-form
-def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FABDv", "^FADDv", "^FSUBv")>;
// ASIMD FP arith,pairwise, D-form
// ASIMD FP arith, pairwise, Q-form
@@ -503,8 +1367,15 @@ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
// ASIMD FP compare, Q-form
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
- "^FCMGTv", "^FCMLEv",
- "^FCMLTv")>;
+ "^FCMGTv", "^FCMLEv",
+ "^FCMLTv")>;
+
+// ASIMD FP round, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v2f32)")>;
+// ASIMD FP round, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
// ASIMD FP convert, long
// ASIMD FP convert, narrow
@@ -512,14 +1383,26 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
// ASIMD FP convert, other, Q-form
// NOTE: Handled by WriteV.
+// ASIMD FP convert, long and narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>;
+// ASIMD FP convert, other, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
+// ASIMD FP convert, other, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
+
// ASIMD FP divide, D-form, F32
def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>;
// ASIMD FP divide, Q-form, F32
def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>;
// ASIMD FP divide, Q-form, F64
def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>;
// ASIMD FP max/min, normal, D-form
// ASIMD FP max/min, normal, Q-form
@@ -540,20 +1423,24 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
// ASIMD FP multiply, Q-form, FZ
// ASIMD FP multiply, Q-form, no FZ
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP multiply accumulate, Dform, FZ
// ASIMD FP multiply accumulate, Dform, no FZ
// ASIMD FP multiply accumulate, Qform, FZ
// ASIMD FP multiply accumulate, Qform, no FZ
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
+def : InstRW<[THX2T99Write_6Cyc_F01],
+ (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
// ASIMD FP negate
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
-// ASIMD FP round, D-form
-// ASIMD FP round, Q-form
-// NOTE: Handled by WriteV.
-
//--
// 3.14 ASIMD Miscellaneous Instructions
//--
@@ -563,37 +1450,66 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^BIFv", "^BITv", "^BSLv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^CLSv", "^CLZv", "^CNTv")>;
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>;
// ASIMD extract
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
// ASIMD extract narrow
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>;
+
// ASIMD extract narrow, saturating
-// NOTE: Handled by WriteV.
+def : InstRW<[THX2T99Write_7Cyc_F01],
+ (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
// ASIMD insert, element to element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
+// ASIMD transfer, element to gen reg
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
+
// ASIMD move, integer immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
// ASIMD move, FP immed
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
+// ASIMD table lookup, D-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>;
+
+// ASIMD table lookup, Q-form
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>;
+
+// ASIMD transpose
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>;
+
+// ASIMD unzip/zip
+def : InstRW<[THX2T99Write_5Cyc_F01],
+ (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
+
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
-def : InstRW<[THX2T99Write_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
- "^FRSQRTEv", "^URSQRTEv")>;
+ "^FRSQRTEv", "^URSQRTEv")>;
// ASIMD reciprocal step, D-form, FZ
// ASIMD reciprocal step, D-form, no FZ
@@ -602,7 +1518,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
// ASIMD reverse
-def : InstRW<[THX2T99Write_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^REV16v", "^REV32v", "^REV64v")>;
// ASIMD table lookup, D-form
@@ -610,135 +1526,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01],
def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
// ASIMD transfer, element to word or word
-def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>;
// ASIMD transfer, element to gen reg
-def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>;
// ASIMD transfer gen reg to element
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD transpose
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
- "^UZP1v", "^UZP2v")>;
+ "^UZP1v", "^UZP2v")>;
// ASIMD unzip/zip
def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
//--
-// 3.15 ASIMD Load Instructions
+// 3.15 ASIMD Load Instructions
//--
// ASIMD load, 1 element, multiple, 1 reg, D-form
// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[THX2T99Write_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[THX2T99Write_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01],
+def : InstRW<[THX2T99Write_5Cyc_LS01],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[THX2T99Write_6Cyc_LS01],
+def : InstRW<[THX2T99Write_6Cyc_LS01],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lone, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//--
@@ -747,106 +1663,83 @@ def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[THX2T99Write_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H
// ASIMD store, 4 element, one lane, S
// ASIMD store, 4 element, one lane, D
def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
-//--
-// 3.17 Cryptography Extensions
-//--
-
-// Crypto AES ops
-def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>;
-
-// Crypto polynomial (64x64) multiply long
-def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
-
-// Crypto SHA1 xor ops
-// Crypto SHA1 schedule acceleration ops
-// Crypto SHA256 schedule acceleration op (1 u-op)
-// Crypto SHA256 schedule acceleration op (2 u-ops)
-// Crypto SHA256 hash acceleration ops
-def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>;
-
-//--
-// 3.18 CRC
-//--
-
-// CRC checksum ops
-def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>;
-
} // SchedModel = ThunderX2T99Model
+
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 6660f0babb8a..1252f9403812 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -167,6 +167,8 @@ extern "C" void LLVMInitializeAArch64Target() {
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
return llvm::make_unique<AArch64_MachoTargetObjectFile>();
+ if (TT.isOSBinFormatCOFF())
+ return llvm::make_unique<AArch64_COFFTargetObjectFile>();
return llvm::make_unique<AArch64_ELFTargetObjectFile>();
}
@@ -179,6 +181,8 @@ static std::string computeDataLayout(const Triple &TT,
return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
if (TT.isOSBinFormatMachO())
return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (TT.isOSBinFormatCOFF())
+ return "e-m:w-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 2c75a3258c1c..fefa7e26b79f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -36,6 +36,7 @@ public:
~AArch64TargetMachine() override;
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
+ const AArch64Subtarget *getSubtargetImpl() const = delete;
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 47e3bce43f6e..9077eb7902fd 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -45,6 +45,9 @@ public:
const TargetMachine &TM) const override;
};
+/// This implementation is used for AArch64 COFF targets.
+class AArch64_COFFTargetObjectFile : public TargetLoweringObjectFileCOFF {};
+
} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4328682b93c..a76f080530bb 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -20,6 +20,23 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
+static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
+ cl::init(true), cl::Hidden);
+
+bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // Inline a callee if its target-features are a subset of the callers
+ // target-features.
+ return (CallerBits & CalleeBits) == CalleeBits;
+}
+
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@@ -631,10 +648,62 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return ST->getMaxInterleaveFactor();
}
-void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
+// For Falkor, we want to avoid having too many strided loads in a loop since
+// that can exhaust the HW prefetcher resources. We adjust the unroller
+// MaxCount preference below to attempt to ensure unrolling doesn't create too
+// many strided loads.
+static void
+getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ enum { MaxStridedLoads = 7 };
+ auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
+ int StridedLoads = 0;
+ // FIXME? We could make this more precise by looking at the CFG and
+ // e.g. not counting loads in each side of an if-then-else diamond.
+ for (const auto BB : L->blocks()) {
+ for (auto &I : *BB) {
+ LoadInst *LMemI = dyn_cast<LoadInst>(&I);
+ if (!LMemI)
+ continue;
+
+ Value *PtrValue = LMemI->getPointerOperand();
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE.getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
+ continue;
+
+ // FIXME? We could take pairing of unrolled load copies into account
+ // by looking at the AddRec, but we would probably have to limit this
+ // to loops with no stores or other memory optimization barriers.
+ ++StridedLoads;
+ // We've seen enough strided loads that seeing more won't make a
+ // difference.
+ if (StridedLoads > MaxStridedLoads / 2)
+ return StridedLoads;
+ }
+ }
+ return StridedLoads;
+ };
+
+ int StridedLoads = countStridedLoads(L, SE);
+ DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
+ << " strided loads\n");
+ // Pick the largest power of 2 unroll count that won't result in too many
+ // strided loads.
+ if (StridedLoads) {
+ UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
+ DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount
+ << '\n');
+ }
+}
+
+void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Enable partial unrolling and runtime unrolling.
- BaseT::getUnrollingPreferences(L, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP);
// For inner loop, it is more likely to be a hot one, and the runtime check
// can be promoted out from LICM pass, so the overhead is less, let's try
@@ -644,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
+
+ if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
+ EnableFalkorHWPFUnrollFix)
+ getFalkorUnrollingPreferences(L, SE, UP);
}
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 290a1ca1f24b..31c037354925 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -51,6 +51,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -119,7 +122,8 @@ public:
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
- void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP);
Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 3d075018904c..475f91016840 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -541,14 +541,13 @@ public:
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32);
}
- void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup,
- const MCValue &Target, bool &IsResolved) override;
+ bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
+ const MCValue &Target) override;
};
-void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCFixup &Fixup,
- const MCValue &Target,
- bool &IsResolved) {
+bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm,
+ const MCFixup &Fixup,
+ const MCValue &Target) {
// The ADRP instruction adds some multiple of 0x1000 to the current PC &
// ~0xfff. This means that the required offset to reach a symbol can vary by
// up to one step depending on where the ADRP is in memory. For example:
@@ -562,11 +561,24 @@ void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
// section isn't 0x1000-aligned, we therefore need to delegate this decision
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
- IsResolved = false;
+ return true;
+ return false;
}
}
+namespace {
+class COFFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple)
+ : AArch64AsmBackend(T, /*IsLittleEndian*/true) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createAArch64WinCOFFObjectWriter(OS);
+ }
+};
+}
+
MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple,
@@ -575,7 +587,11 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
if (TheTriple.isOSBinFormatMachO())
return new DarwinAArch64AsmBackend(T, MRI);
- assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
+ if (TheTriple.isOSBinFormatCOFF())
+ return new COFFAArch64AsmBackend(T, TheTriple);
+
+ assert(TheTriple.isOSBinFormatELF() && "Invalid target");
+
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
bool IsILP32 = Options.getABIName() == "ilp32";
return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index f7dda92fb551..89c3e5b4c76e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -49,10 +49,11 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
/*HasRelocationAddend*/ true),
IsILP32(IsILP32) {}
-#define R_CLS(rtype) \
- IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
-#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
- "supported (LP64 eqv: " #lp64rtype ")"
+#define R_CLS(rtype) \
+ IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
+#define BAD_ILP32_MOV(lp64rtype) \
+ "ILP32 absolute MOV relocation not " \
+ "supported (LP64 eqv: " #lp64rtype ")"
// assumes IsILP32 is true
static bool isNonILP32reloc(const MCFixup &Fixup,
@@ -60,44 +61,45 @@ static bool isNonILP32reloc(const MCFixup &Fixup,
MCContext &Ctx) {
if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw)
return false;
- switch(RefKind) {
- case AArch64MCExpr::VK_ABS_G3:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
- return true;
- case AArch64MCExpr::VK_ABS_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
- return true;
- case AArch64MCExpr::VK_ABS_G2_S:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
- return true;
- case AArch64MCExpr::VK_ABS_G2_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
- return true;
- case AArch64MCExpr::VK_ABS_G1_S:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
- return true;
- case AArch64MCExpr::VK_ABS_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
- return true;
- case AArch64MCExpr::VK_DTPREL_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
- return true;
- case AArch64MCExpr::VK_DTPREL_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
- return true;
- case AArch64MCExpr::VK_TPREL_G2:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
- return true;
- case AArch64MCExpr::VK_TPREL_G1_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
- return true;
- case AArch64MCExpr::VK_GOTTPREL_G1:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
- return true;
- case AArch64MCExpr::VK_GOTTPREL_G0_NC:
- Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
- return true;
- default: return false;
+ switch (RefKind) {
+ case AArch64MCExpr::VK_ABS_G3:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2_S:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
+ return true;
+ case AArch64MCExpr::VK_ABS_G2_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
+ return true;
+ case AArch64MCExpr::VK_ABS_G1_S:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
+ return true;
+ case AArch64MCExpr::VK_ABS_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_DTPREL_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
+ return true;
+ case AArch64MCExpr::VK_DTPREL_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_TPREL_G2:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
+ return true;
+ case AArch64MCExpr::VK_TPREL_G1_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
+ return true;
+ case AArch64MCExpr::VK_GOTTPREL_G1:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
+ return true;
+ case AArch64MCExpr::VK_GOTTPREL_G0_NC:
+ Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
+ return true;
+ default:
+ return false;
}
return false;
}
@@ -130,7 +132,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(PREL32);
case FK_Data_8:
if (IsILP32) {
- Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data "
+ Ctx.reportError(Fixup.getLoc(),
+ "ILP32 8 byte PC relative data "
"relocation not supported (LP64 eqv: PREL64)");
return ELF::R_AARCH64_NONE;
} else
@@ -178,7 +181,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
}
} else {
if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
- return ELF::R_AARCH64_NONE;
+ return ELF::R_AARCH64_NONE;
switch ((unsigned)Fixup.getKind()) {
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
@@ -189,8 +192,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return R_CLS(ABS32);
case FK_Data_8:
if (IsILP32) {
- Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data "
- "relocation not supported (LP64 eqv: ABS64)");
+ Ctx.reportError(Fixup.getLoc(),
+ "ILP32 8 byte absolute data "
+ "relocation not supported (LP64 eqv: ABS64)");
return ELF::R_AARCH64_NONE;
} else
return ELF::R_AARCH64_ABS64;
@@ -262,7 +266,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte unchecked GOT load/store relocation "
- "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
+ "not supported (ILP32 eqv: LD32_GOT_LO12_NC");
return ELF::R_AARCH64_NONE;
}
}
@@ -270,12 +274,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
"ILP32 4 byte checked GOT load/store relocation "
- "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
+ "not supported (unchecked eqv: LD32_GOT_LO12_NC)");
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte checked GOT load/store relocation "
- "not supported (unchecked/ILP32 eqv: "
- "LD32_GOT_LO12_NC)");
+ "not supported (unchecked/ILP32 eqv: "
+ "LD32_GOT_LO12_NC)");
}
return ELF::R_AARCH64_NONE;
}
@@ -283,7 +287,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsILP32) {
return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC;
} else {
- Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store "
+ Ctx.reportError(Fixup.getLoc(),
+ "LP64 32-bit load/store "
"relocation not supported (ILP32 eqv: "
"TLSIE_LD32_GOTTPREL_LO12_NC)");
return ELF::R_AARCH64_NONE;
@@ -295,14 +300,14 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
} else {
Ctx.reportError(Fixup.getLoc(),
"LP64 4 byte TLSDESC load/store relocation "
- "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
+ "not supported (ILP32 eqv: TLSDESC_LD64_LO12)");
return ELF::R_AARCH64_NONE;
}
}
Ctx.reportError(Fixup.getLoc(),
"invalid fixup for 32-bit load/store instruction "
- "fixup_aarch64_ldst_imm12_scale4");
+ "fixup_aarch64_ldst_imm12_scale4");
return ELF::R_AARCH64_NONE;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
@@ -312,8 +317,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_LD64_GOT_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "LD64_GOT_LO12_NC)");
+ "relocation not supported (LP64 eqv: "
+ "LD64_GOT_LO12_NC)");
return ELF::R_AARCH64_NONE;
}
}
@@ -330,8 +335,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "TLSIE_LD64_GOTTPREL_LO12_NC)");
+ "relocation not supported (LP64 eqv: "
+ "TLSIE_LD64_GOTTPREL_LO12_NC)");
return ELF::R_AARCH64_NONE;
}
}
@@ -340,8 +345,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AARCH64_TLSDESC_LD64_LO12;
} else {
Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store "
- "relocation not supported (LP64 eqv: "
- "TLSDESC_LD64_LO12)");
+ "relocation not supported (LP64 eqv: "
+ "TLSDESC_LD64_LO12)");
return ELF::R_AARCH64_NONE;
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 031aa8b81e35..a0de3c39562b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
+#include "AArch64WinCOFFStreamer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
@@ -30,6 +31,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -210,6 +212,8 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
const Triple &TT = STI.getTargetTriple();
if (TT.isOSBinFormatELF())
return new AArch64TargetELFStreamer(S);
+ if (TT.isOSBinFormatCOFF())
+ return new AArch64TargetWinCOFFStreamer(S);
return nullptr;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index 0f5b765c7697..4293dcba955e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -16,53 +16,47 @@ namespace llvm {
namespace AArch64 {
enum Fixups {
- // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADR instruction.
+ // A 21-bit pc-relative immediate inserted into an ADR instruction.
fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind,
- // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADRP instruction.
+ // A 21-bit pc-relative immediate inserted into an ADRP instruction.
fixup_aarch64_pcrel_adrp_imm21,
- // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions.
- // No alignment adjustment. All value bits are encoded.
+ // 12-bit fixup for add/sub instructions. No alignment adjustment. All value
+ // bits are encoded.
fixup_aarch64_add_imm12,
- // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and
- // store instructions.
+ // unsigned 12-bit fixups for load and store instructions.
fixup_aarch64_ldst_imm12_scale1,
fixup_aarch64_ldst_imm12_scale2,
fixup_aarch64_ldst_imm12_scale4,
fixup_aarch64_ldst_imm12_scale8,
fixup_aarch64_ldst_imm12_scale16,
- // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by
- // pc-relative loads and generates relocations directly when necessary.
+ // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
+ // fixup_aarch64_pcrel_adrhi, except this is used by pc-relative loads and
+ // generates relocations directly when necessary.
fixup_aarch64_ldr_pcrel_imm19,
// FIXME: comment
fixup_aarch64_movw,
- // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
- // immediate.
+ // The high 14 bits of a 21-bit pc-relative immediate.
fixup_aarch64_pcrel_branch14,
- // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by
- // b.cc and generates relocations directly when necessary.
+ // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as
+ // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates
+ // relocations directly when necessary.
fixup_aarch64_pcrel_branch19,
- // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
- // immediate.
+ // The high 26 bits of a 28-bit pc-relative immediate.
fixup_aarch64_pcrel_branch26,
- // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
- // immediate. Distinguished from branch26 only on ELF.
+ // The high 26 bits of a 28-bit pc-relative immediate. Distinguished from
+ // branch26 only on ELF.
fixup_aarch64_pcrel_call26,
- // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF
- // R_AARCH64_TLSDESC_CALL relocation.
+ // zero-space placeholder for the ELF R_AARCH64_TLSDESC_CALL relocation.
fixup_aarch64_tlsdesc_call,
// Marker
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 1b28df963b40..fc808ee0cdd6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -100,3 +100,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) {
HasIdentDirective = true;
}
+
+AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() {
+ CommentString = ";";
+}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 253cd30f26ee..2d7107a37244 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H
+#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
#include "llvm/MC/MCAsmInfoELF.h"
@@ -33,6 +34,10 @@ struct AArch64MCAsmInfoELF : public MCAsmInfoELF {
explicit AArch64MCAsmInfoELF(const Triple &T);
};
+struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF {
+ explicit AArch64MCAsmInfoCOFF();
+};
+
} // namespace llvm
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index f710065d9bc7..a2555496cdb9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -14,6 +14,7 @@
#include "AArch64MCTargetDesc.h"
#include "AArch64ELFStreamer.h"
#include "AArch64MCAsmInfo.h"
+#include "AArch64WinCOFFStreamer.h"
#include "InstPrinter/AArch64InstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -59,8 +60,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
MAI = new AArch64MCAsmInfoDarwin();
+ else if (TheTriple.isOSBinFormatCOFF())
+ MAI = new AArch64MCAsmInfoCOFF();
else {
- assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
+ assert(TheTriple.isOSBinFormatELF() && "Invalid target");
MAI = new AArch64MCAsmInfoELF(TheTriple);
}
@@ -74,8 +77,8 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
CodeModel::Model &CM) {
- assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) &&
- "Only expect Darwin and ELF targets");
+ assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
+ TT.isOSBinFormatCOFF()) && "Invalid target");
if (CM == CodeModel::Default)
CM = CodeModel::Small;
@@ -122,6 +125,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
/*LabelSections*/ true);
}
+static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ IncrementalLinkerCompatible);
+}
+
static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
return new MCInstrAnalysis(Info);
}
@@ -154,6 +165,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the obj streamers.
TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createWinCOFFStreamer);
// Register the obj target streamer.
TargetRegistry::RegisterObjectTargetStreamer(
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 615d7dab2c51..1404926b8124 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -60,6 +60,8 @@ MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS);
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..7862a03e771c
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
@@ -0,0 +1,65 @@
+//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ AArch64WinCOFFObjectWriter()
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) {
+ }
+
+ ~AArch64WinCOFFObjectWriter() override = default;
+
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
+
+ bool recordRelocation(const MCFixup &) const override;
+};
+
+} // end anonymous namespace
+
+unsigned
+AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const {
+ const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind());
+ report_fatal_error(Twine("unsupported relocation type: ") + Info.Name);
+}
+
+bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
+ return true;
+}
+
+namespace llvm {
+
+MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) {
+ MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter();
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
new file mode 100644
index 000000000000..6c8da27e398f
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
@@ -0,0 +1,37 @@
+//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64WinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+
+class AArch64WinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ friend class AArch64TargetWinCOFFStreamer;
+
+ AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) {}
+};
+} // end anonymous namespace
+
+namespace llvm {
+MCWinCOFFStreamer
+*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
+ auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
+}
+
+} // end llvm namespace
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
new file mode 100644
index 000000000000..1b4fcd6804e2
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h
@@ -0,0 +1,43 @@
+//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements WinCOFF streamer information for the AArch64 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H
+#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H
+
+#include "AArch64TargetStreamer.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+namespace {
+class AArch64WinCOFFStreamer;
+
+class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer {
+private:
+ AArch64WinCOFFStreamer &getStreamer();
+
+public:
+ AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S)
+ : AArch64TargetStreamer(S) {}
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+MCWinCOFFStreamer
+*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
+} // end llvm namespace
+
+#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 6d8be5e63fbb..56eeba8a1d4b 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,8 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCTargetDesc.cpp
AArch64MachObjectWriter.cpp
AArch64TargetStreamer.cpp
+ AArch64WinCOFFObjectWriter.cpp
+ AArch64WinCOFFStreamer.cpp
)
add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)