diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-07-01 13:22:02 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-07-01 13:22:02 +0000 |
| commit | 9df3605dea17e84f8183581f6103bd0c79e2a606 (patch) | |
| tree | 70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib/Target/AArch64 | |
| parent | 08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff) | |
Diffstat (limited to 'lib/Target/AArch64')
27 files changed, 1511 insertions, 275 deletions
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index f27bc97ec3f3..0a948812ff33 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -22,7 +22,7 @@ /// cbz w8, .LBB1_2 -> b.eq .LBB1_2 /// /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. -/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2 +/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2 /// //===----------------------------------------------------------------------===// @@ -129,11 +129,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { break; case AArch64::TBZW: case AArch64::TBZX: - CC = AArch64CC::GE; + CC = AArch64CC::PL; break; case AArch64::TBNZW: case AArch64::TBNZX: - CC = AArch64CC::LT; + CC = AArch64CC::MI; break; } return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) @@ -271,6 +271,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, } break; } + (void)NewCmp; (void)NewBr; assert(NewCmp && NewBr && "Expected new instructions."); DEBUG(dbgs() << " with instruction:\n "); diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 00a0111f2bd2..9eda56c825a9 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -139,6 +140,7 @@ class SSACCmpConv { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + const MachineBranchProbabilityInfo *MBPI; public: /// The first block containing a conditional branch, dominating everything @@ -186,8 +188,10 @@ private: public: /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { + void runOnMachineFunction(MachineFunction &MF, + const MachineBranchProbabilityInfo *MBPI) { this->MF = &MF; + this->MBPI = MBPI; TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -564,8 +568,40 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. updateTailPHIs(); - Head->removeSuccessor(CmpBB, true); - CmpBB->removeSuccessor(Tail, true); + + // Save successor probabilties before removing CmpBB and Tail from their + // parents. + BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB); + BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail); + + Head->removeSuccessor(CmpBB); + CmpBB->removeSuccessor(Tail); + + // If Head and CmpBB had successor probabilties, udpate the probabilities to + // reflect the ccmp-conversion. + if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) { + + // Head is allowed two successors. We've removed CmpBB, so the remaining + // successor is Tail. We need to increase the successor probability for + // Tail to account for the CmpBB path we removed. + // + // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB). + assert(*Head->succ_begin() == Tail && "Head successor is not Tail"); + BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail); + Head->setSuccProbability(Head->succ_begin(), + Head2Tail + Head2CmpBB * CmpBB2Tail); + + // We will transfer successors of CmpBB to Head in a moment without + // normalizing the successor probabilities. Set the successor probabilites + // before doing so. + // + // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB). + for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) { + BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I); + CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I); + } + } + Head->transferSuccessorsAndUpdatePHIs(CmpBB); DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); TII->removeBranch(*Head); @@ -717,6 +753,7 @@ int SSACCmpConv::expectedCodeSizeDelta() const { namespace { class AArch64ConditionalCompares : public MachineFunctionPass { + const MachineBranchProbabilityInfo *MBPI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -753,6 +790,7 @@ char AArch64ConditionalCompares::ID = 0; INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", @@ -763,6 +801,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() { } void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -892,12 +931,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; MinSize = MF.getFunction()->optForMinSize(); bool Changed = false; - CmpConv.runOnMachineFunction(MF); + CmpConv.runOnMachineFunction(MF, MBPI); // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2965106fd270..aaf32a499bc3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7561,8 +7561,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); - + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SVI->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SVI].push_back(SubVec); } } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index ad24612239fa..6cb723d187af 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -735,7 +735,7 @@ def : ShiftAlias<"rorv", RORVWr, GPR32>; def : ShiftAlias<"rorv", RORVXr, GPR64>; // Multiply-add -let AddedComplexity = 7 in { +let AddedComplexity = 5 in { defm MADD : MulAccum<0, "madd", add>; defm MSUB : MulAccum<1, "msub", sub>; @@ -752,7 +752,7 @@ def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -} // AddedComplexity = 7 +} // AddedComplexity = 5 let AddedComplexity = 5 in { def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 9bfd570e9a82..07ce0e863c5e 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -947,7 +947,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); if (DstRB.getID() != SrcRB.getID()) { - DEBUG(dbgs() << "G_TRUNC input/output on different banks\n"); + DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); return false; } @@ -964,16 +964,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { - DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); return false; } if (DstRC == SrcRC) { // Nothing to be done + } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && + SrcTy == LLT::scalar(64)) { + llvm_unreachable("TableGen can import this case"); + return false; } else if (DstRC == &AArch64::GPR32RegClass && SrcRC == &AArch64::GPR64RegClass) { I.getOperand(1).setSubReg(AArch64::sub_32); } else { + DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); return false; } diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 01196817f311..4b568f3fba2b 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -39,6 +39,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); + for (auto Ty : {p0, s1, s8, s16, s32, s64}) + setAction({G_IMPLICIT_DEF, Ty}, Legal); + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) { // These operations naturally get the right answer when used on // GPR32, even if the actual type is narrower. @@ -99,6 +102,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap). } + for (auto Ty : {s1, s8, s16, s32, s64, p0}) + setAction({G_EXTRACT, Ty}, Legal); + + for (auto Ty : {s32, s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) { for (auto Ty : {s8, s16, s32, s64, p0, v2s32}) setAction({MemOp, Ty}, Legal); diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 45083df7ab45..f82b9dbc2c9f 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -151,13 +151,24 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, return MCOperand::createExpr(Expr); } +MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO, + MCSymbol *Sym) const { + MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + return MCOperand::createExpr(Expr); +} + MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { if (Printer.TM.getTargetTriple().isOSDarwin()) return lowerSymbolOperandDarwin(MO, Sym); + if (Printer.TM.getTargetTriple().isOSBinFormatCOFF()) + return lowerSymbolOperandCOFF(MO, Sym); - assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && - "Expect Darwin or ELF target"); + assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && "Invalid target"); return lowerSymbolOperandELF(MO, Sym); } diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h index 1e29b80c2d62..aa30fe1fa707 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.h +++ b/lib/Target/AArch64/AArch64MCInstLower.h @@ -42,6 +42,8 @@ public: MCSymbol *Sym) const; MCOperand lowerSymbolOperandELF(const MachineOperand &MO, MCSymbol *Sym) const; + MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO, + MCSymbol *Sym) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index baf15ac540cf..fab92e139dd0 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -94,7 +94,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { if (TT.isOSDarwin()) return CSR_AArch64_TLS_Darwin_RegMask; - assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS"); + assert(TT.isOSBinFormatELF() && "Invalid target"); return CSR_AArch64_TLS_ELF_RegMask; } diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 3654eeca530a..10df50bcf156 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -1,4 +1,4 @@ -//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=// +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -79,75 +79,207 @@ def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; // 60 entry unified scheduler. def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, - THX2T99P3, THX2T99P4, THX2T99P5]> { - let BufferSize=60; + THX2T99P3, THX2T99P4, THX2T99P5]> { + let BufferSize = 60; } // Define commonly used write types for InstRW specializations. // All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>. // 3 cycles on I1. -def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; } +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 1 cycles on I2. +def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} // 4 cycles on I1. -def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; } +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 23 cycles on I1. +def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} // 1 cycle on I0, I1, or I2. -def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; } +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0, I1, or I2. +def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on I0, I1, or I2. +def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on I0, I1, or I2. +def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} // 5 cycles on F1. -def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; } +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 5; + let NumMicroOps = 2; +} // 7 cycles on F1. -def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; } +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 7; + let NumMicroOps = 2; +} // 4 cycles on F0 or F1. -def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; } +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} // 5 cycles on F0 or F1. -def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; } +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} // 6 cycles on F0 or F1. -def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; } +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} // 7 cycles on F0 or F1. -def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; } +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} // 8 cycles on F0 or F1. -def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; } +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0 or F1. +def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 10; + let NumMicroOps = 3; +} // 16 cycles on F0 or F1. def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 16; + let NumMicroOps = 3; let ResourceCycles = [8]; } // 23 cycles on F0 or F1. def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 23; + let NumMicroOps = 3; let ResourceCycles = [11]; } // 1 cycles on LS0 or LS1. -def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; } +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 0; +} + +// 1 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 2; +} + +// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 3; +} + +// 2 cycles on LS0 or LS1. +def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 1; + let NumMicroOps = 2; +} // 4 cycles on LS0 or LS1. -def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; } +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} // 5 cycles on LS0 or LS1. -def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; } +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 5; + let NumMicroOps = 3; +} // 6 cycles on LS0 or LS1. -def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; } +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} // 5 cycles on LS0 or LS1 and I0, I1, or I2. def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { let Latency = 5; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_6Cyc_LS01_I012_I012 : +def THX2T99Write_5Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { let Latency = 6; let NumMicroOps = 3; @@ -162,25 +294,25 @@ def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { // 5 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 5; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 6 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 6; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 7 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 7; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 8 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 8; - let NumMicroOps = 2; + let NumMicroOps = 3; } // Define commonly used read types. @@ -195,10 +327,8 @@ def : ReadAdvance<ReadID, 0>; def : ReadAdvance<ReadExtrHi, 0>; def : ReadAdvance<ReadAdrBase, 0>; def : ReadAdvance<ReadVLD, 0>; - } - //===----------------------------------------------------------------------===// // 3. Instruction Tables. @@ -211,88 +341,217 @@ let SchedModel = ThunderX2T99Model in { // Branch, immed // Branch and link, immed // Compare and branch -def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; } +def : WriteRes<WriteBr, [THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes<WriteBrReg, [THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} def : WriteRes<WriteSys, []> { let Latency = 1; } def : WriteRes<WriteBarrier, []> { let Latency = 1; } def : WriteRes<WriteHint, []> { let Latency = 1; } -def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } +def : WriteRes<WriteAtomic, []> { + let Unsupported = 1; + let NumMicroOps = 2; +} -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; } +//--- +// Branch +//--- +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>; +def : InstRW<[THX2T99Write_1Cyc_I2], + (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; //--- // 3.2 Arithmetic and Logical Instructions // 3.3 Move and Shift Instructions //--- + // ALU, basic // Conditional compare // Conditional select // Address generation -def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteI, [THX2T99I012]> { + let Latency = 1; + let ResourceCycles = [1, 3]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + def : InstRW<[WriteI], (instrs COPY)>; // ALU, extend and/or shift def : WriteRes<WriteISReg, [THX2T99I012]> { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [2, 3]; + let NumMicroOps = 2; } +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + def : WriteRes<WriteIEReg, [THX2T99I012]> { - let Latency = 2; - let ResourceCycles = [2]; + let Latency = 1; + let ResourceCycles = [1, 3]; + let NumMicroOps = 2; } +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + // Move immed -def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteImm, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; // Variable shift -def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteIS, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} //--- // 3.4 Divide and Multiply Instructions //--- // Divide, W-form -// Latency range of 13-23. Take the average. +// Latency range of 13-23/13-39. def : WriteRes<WriteID32, [THX2T99I1]> { - let Latency = 18; - let ResourceCycles = [18]; + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; } // Divide, X-form -// Latency range of 13-39. Take the average. def : WriteRes<WriteID64, [THX2T99I1]> { - let Latency = 26; - let ResourceCycles = [26]; + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; } // Multiply accumulate, W-form -def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; } +def : WriteRes<WriteIM32, [THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} // Multiply accumulate, X-form -def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; } +def : WriteRes<WriteIM64, [THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX2T99Write_5Cyc_I012], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; // Bitfield extract, two reg -def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteExtr, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; -// Bitfield move, basic // Bitfield move, insert -// NOTE: Handled by WriteIS. +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>; +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>; // Count leading def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", - "^CLZ(W|X)r$")>; + "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; // Reverse bits/bytes // NOTE: Handled by WriteI. //--- -// 3.6 Load Instructions +// 3.6 Load Instructions // 3.10 FP Load Instructions //--- @@ -300,13 +559,29 @@ def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", // Load register, unscaled immed // Load register, immed unprivileged // Load register, unsigned immed -def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; } +def : WriteRes<WriteLD, [THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} // Load register, immed post-index // NOTE: Handled by WriteLD, WriteI. // Load register, immed pre-index // NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteAdr, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes<WriteLDHi, []> { + let Latency = 5; + let NumMicroOps = 5; +} // Load register offset, basic // Load register, register offset, scale by 4/8 @@ -324,23 +599,229 @@ def THX2T99ReadAdrBase : SchedReadVariant<[ SchedVar<NoSchedPred, [ReadDefault]>]>; def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>; -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handling by WriteLD. -def : WriteRes<WriteLDHi, []> { - let Latency = 5; -} - // Load pair, immed pre-index, normal // Load pair, immed pre-index, signed words // Load pair, immed post-index, normal // Load pair, immed post-index, signed words // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>; + +//--- +// Prefetch +//--- +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + //-- -// 3.7 Store Instructions +// 3.7 Store Instructions // 3.11 FP Store Instructions //-- @@ -382,6 +863,195 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> { // Store pair, immed pre-index, X-form // NOTE: Handled by WriteAdr, WriteSTP. +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + //--- // 3.8 FP Data Processing Instructions //--- @@ -389,28 +1059,95 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> { // FP absolute value // FP min/max // FP negate -def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; } +def : WriteRes<WriteF, [THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} // FP arithmetic def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; // FP compare -def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; } +def : WriteRes<WriteFCmp, [THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} -// FP divide, S-form -// FP square root, S-form -def : WriteRes<WriteFDiv, [THX2T99F01]> { +// FP Mul, Div, Sqrt +def : WriteRes<WriteFDiv, [THX2T99F01]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> { let Latency = 16; let ResourceCycles = [8]; + let NumMicroOps = 4; } +def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>; + // FP divide, D-form // FP square root, D-form -def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>; // FP multiply // FP multiply accumulate -def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; } +def : WriteRes<WriteFMul, [THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX2T99XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; // FP round to integral def : InstRW<[THX2T99Write_7Cyc_F01], @@ -426,15 +1163,25 @@ def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; // FP convert, from vec to vec reg // FP convert, from gen to vec reg // FP convert, from vec to gen reg -def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; } +def : WriteRes<WriteFCvt, [THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} // FP move, immed // FP move, register -def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; } +def : WriteRes<WriteFImm, [THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} // FP transfer, from gen to vec reg // FP transfer, from vec to gen reg -def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; } +def : WriteRes<WriteFCopy, [THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} + def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; //--- @@ -470,19 +1217,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; // ASIMD shift by register, basic, Q-form // ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, Q-form -def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; } +def : WriteRes<WriteV, [THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [4, 23]; +} // ASIMD arith, reduce, 4H/4S // ASIMD arith, reduce, 8B/8H // ASIMD arith, reduce, 16B -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD logical (MOV, MVN, ORN, ORR) -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv", + "^ORRv", "^ORNv", "^NOTv")>; +// ASIMD arith, reduce +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD polynomial (8x8) multiply long -def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; //--- // 3.13 ASIMD Floating-point Instructions @@ -493,7 +1356,8 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; // ASIMD FP arith, normal, D-form // ASIMD FP arith, normal, Q-form -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; // ASIMD FP arith,pairwise, D-form // ASIMD FP arith, pairwise, Q-form @@ -503,8 +1367,15 @@ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; // ASIMD FP compare, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; // ASIMD FP convert, long // ASIMD FP convert, narrow @@ -512,14 +1383,26 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", // ASIMD FP convert, other, Q-form // NOTE: Handled by WriteV. +// ASIMD FP convert, long and narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + // ASIMD FP divide, D-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>; // ASIMD FP divide, Q-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>; // ASIMD FP divide, Q-form, F64 def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>; // ASIMD FP max/min, normal, D-form // ASIMD FP max/min, normal, Q-form @@ -540,20 +1423,24 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", // ASIMD FP multiply, Q-form, FZ // ASIMD FP multiply, Q-form, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP multiply accumulate, Dform, FZ // ASIMD FP multiply accumulate, Dform, no FZ // ASIMD FP multiply accumulate, Qform, FZ // ASIMD FP multiply accumulate, Qform, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP negate def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; -// ASIMD FP round, D-form -// ASIMD FP round, Q-form -// NOTE: Handled by WriteV. - //-- // 3.14 ASIMD Miscellaneous Instructions //-- @@ -563,37 +1450,66 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; // ASIMD bitwise insert, D-form // ASIMD bitwise insert, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^BIFv", "^BITv", "^BSLv")>; // ASIMD count, D-form // ASIMD count, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^CLSv", "^CLZv", "^CNTv")>; // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; // ASIMD extract def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; // ASIMD extract narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>; + // ASIMD extract narrow, saturating -// NOTE: Handled by WriteV. +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; // ASIMD insert, element to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + // ASIMD move, integer immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; // ASIMD move, FP immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; +// ASIMD table lookup, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>; + +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + // ASIMD reciprocal estimate, D-form // ASIMD reciprocal estimate, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; + "^FRSQRTEv", "^URSQRTEv")>; // ASIMD reciprocal step, D-form, FZ // ASIMD reciprocal step, D-form, no FZ @@ -602,7 +1518,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; // ASIMD reverse -def : InstRW<[THX2T99Write_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^REV16v", "^REV32v", "^REV64v")>; // ASIMD table lookup, D-form @@ -610,135 +1526,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; // ASIMD transfer, element to word or word -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; // ASIMD transfer, element to gen reg -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>; // ASIMD transfer gen reg to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD transpose def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", - "^UZP1v", "^UZP2v")>; + "^UZP1v", "^UZP2v")>; // ASIMD unzip/zip def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; //-- -// 3.15 ASIMD Load Instructions +// 3.15 ASIMD Load Instructions //-- // ASIMD load, 1 element, multiple, 1 reg, D-form // ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form // ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form // ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01], +def : InstRW<[THX2T99Write_5Cyc_LS01], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form // ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_6Cyc_LS01], +def : InstRW<[THX2T99Write_6Cyc_LS01], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D // ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D // ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lone, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_7Cyc_LS01_F01], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_6Cyc_LS01_F01], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; //-- @@ -747,106 +1663,83 @@ def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], // ASIMD store, 1 element, multiple, 1 reg, D-form // ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form // ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form // ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form // ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H // ASIMD store, 4 element, one lane, S // ASIMD store, 4 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4i(8|16|32|64)_POST$")>; -//-- -// 3.17 Cryptography Extensions -//-- - -// Crypto AES ops -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>; - -// Crypto polynomial (64x64) multiply long -def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; - -// Crypto SHA1 xor ops -// Crypto SHA1 schedule acceleration ops -// Crypto SHA256 schedule acceleration op (1 u-op) -// Crypto SHA256 schedule acceleration op (2 u-ops) -// Crypto SHA256 hash acceleration ops -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>; - -//-- -// 3.18 CRC -//-- - -// CRC checksum ops -def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>; - } // SchedModel = ThunderX2T99Model + diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 6660f0babb8a..1252f9403812 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -167,6 +167,8 @@ extern "C" void LLVMInitializeAArch64Target() { static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) return llvm::make_unique<AArch64_MachoTargetObjectFile>(); + if (TT.isOSBinFormatCOFF()) + return llvm::make_unique<AArch64_COFFTargetObjectFile>(); return llvm::make_unique<AArch64_ELFTargetObjectFile>(); } @@ -179,6 +181,8 @@ static std::string computeDataLayout(const Triple &TT, return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; if (TT.isOSBinFormatMachO()) return "e-m:o-i64:64-i128:128-n32:64-S128"; + if (TT.isOSBinFormatCOFF()) + return "e-m:w-i64:64-i128:128-n32:64-S128"; if (LittleEndian) return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 2c75a3258c1c..fefa7e26b79f 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -36,6 +36,7 @@ public: ~AArch64TargetMachine() override; const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; + const AArch64Subtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 47e3bce43f6e..9077eb7902fd 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -45,6 +45,9 @@ public: const TargetMachine &TM) const override; }; +/// This implementation is used for AArch64 COFF targets. +class AArch64_COFFTargetObjectFile : public TargetLoweringObjectFileCOFF {}; + } // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4328682b93c..a76f080530bb 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -20,6 +20,23 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" +static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", + cl::init(true), cl::Hidden); + +bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; +} + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. @@ -631,10 +648,62 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { return ST->getMaxInterleaveFactor(); } -void AArch64TTIImpl::getUnrollingPreferences(Loop *L, +// For Falkor, we want to avoid having too many strided loads in a loop since +// that can exhaust the HW prefetcher resources. We adjust the unroller +// MaxCount preference below to attempt to ensure unrolling doesn't create too +// many strided loads. +static void +getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TargetTransformInfo::UnrollingPreferences &UP) { + enum { MaxStridedLoads = 7 }; + auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) { + int StridedLoads = 0; + // FIXME? We could make this more precise by looking at the CFG and + // e.g. not counting loads in each side of an if-then-else diamond. + for (const auto BB : L->blocks()) { + for (auto &I : *BB) { + LoadInst *LMemI = dyn_cast<LoadInst>(&I); + if (!LMemI) + continue; + + Value *PtrValue = LMemI->getPointerOperand(); + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE.getSCEV(PtrValue); + const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); + if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) + continue; + + // FIXME? We could take pairing of unrolled load copies into account + // by looking at the AddRec, but we would probably have to limit this + // to loops with no stores or other memory optimization barriers. + ++StridedLoads; + // We've seen enough strided loads that seeing more won't make a + // difference. + if (StridedLoads > MaxStridedLoads / 2) + return StridedLoads; + } + } + return StridedLoads; + }; + + int StridedLoads = countStridedLoads(L, SE); + DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads + << " strided loads\n"); + // Pick the largest power of 2 unroll count that won't result in too many + // strided loads. + if (StridedLoads) { + UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads); + DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount + << '\n'); + } +} + +void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Enable partial unrolling and runtime unrolling. - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); // For inner loop, it is more likely to be a hot one, and the runtime check // can be promoted out from LICM pass, so the overhead is less, let's try @@ -644,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; + + if (ST->getProcFamily() == AArch64Subtarget::Falkor && + EnableFalkorHWPFUnrollFix) + getFalkorUnrollingPreferences(L, SE, UP); } Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 290a1ca1f24b..31c037354925 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -51,6 +51,9 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + /// \name Scalar TTI Implementations /// @{ @@ -119,7 +122,8 @@ public: int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 3d075018904c..475f91016840 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -541,14 +541,13 @@ public: return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32); } - void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; }; -void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target, - bool &IsResolved) { +bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { // The ADRP instruction adds some multiple of 0x1000 to the current PC & // ~0xfff. This means that the required offset to reach a symbol can vary by // up to one step depending on where the ADRP is in memory. For example: @@ -562,11 +561,24 @@ void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm, // section isn't 0x1000-aligned, we therefore need to delegate this decision // to the linker -- a relocation! if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21) - IsResolved = false; + return true; + return false; } } +namespace { +class COFFAArch64AsmBackend : public AArch64AsmBackend { +public: + COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple) + : AArch64AsmBackend(T, /*IsLittleEndian*/true) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createAArch64WinCOFFObjectWriter(OS); + } +}; +} + MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TheTriple, @@ -575,7 +587,11 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, if (TheTriple.isOSBinFormatMachO()) return new DarwinAArch64AsmBackend(T, MRI); - assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); + if (TheTriple.isOSBinFormatCOFF()) + return new COFFAArch64AsmBackend(T, TheTriple); + + assert(TheTriple.isOSBinFormatELF() && "Invalid target"); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); bool IsILP32 = Options.getABIName() == "ilp32"; return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index f7dda92fb551..89c3e5b4c76e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -49,10 +49,11 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, /*HasRelocationAddend*/ true), IsILP32(IsILP32) {} -#define R_CLS(rtype) \ - IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype -#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\ - "supported (LP64 eqv: " #lp64rtype ")" +#define R_CLS(rtype) \ + IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype +#define BAD_ILP32_MOV(lp64rtype) \ + "ILP32 absolute MOV relocation not " \ + "supported (LP64 eqv: " #lp64rtype ")" // assumes IsILP32 is true static bool isNonILP32reloc(const MCFixup &Fixup, @@ -60,44 +61,45 @@ static bool isNonILP32reloc(const MCFixup &Fixup, MCContext &Ctx) { if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw) return false; - switch(RefKind) { - case AArch64MCExpr::VK_ABS_G3: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3)); - return true; - case AArch64MCExpr::VK_ABS_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2)); - return true; - case AArch64MCExpr::VK_ABS_G2_S: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); - return true; - case AArch64MCExpr::VK_ABS_G2_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); - return true; - case AArch64MCExpr::VK_ABS_G1_S: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); - return true; - case AArch64MCExpr::VK_ABS_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); - return true; - case AArch64MCExpr::VK_DTPREL_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); - return true; - case AArch64MCExpr::VK_DTPREL_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); - return true; - case AArch64MCExpr::VK_TPREL_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); - return true; - case AArch64MCExpr::VK_TPREL_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); - return true; - case AArch64MCExpr::VK_GOTTPREL_G1: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); - return true; - case AArch64MCExpr::VK_GOTTPREL_G0_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); - return true; - default: return false; + switch (RefKind) { + case AArch64MCExpr::VK_ABS_G3: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3)); + return true; + case AArch64MCExpr::VK_ABS_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2)); + return true; + case AArch64MCExpr::VK_ABS_G2_S: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); + return true; + case AArch64MCExpr::VK_ABS_G2_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); + return true; + case AArch64MCExpr::VK_ABS_G1_S: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); + return true; + case AArch64MCExpr::VK_ABS_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); + return true; + case AArch64MCExpr::VK_DTPREL_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); + return true; + case AArch64MCExpr::VK_DTPREL_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); + return true; + case AArch64MCExpr::VK_TPREL_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); + return true; + case AArch64MCExpr::VK_TPREL_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); + return true; + case AArch64MCExpr::VK_GOTTPREL_G1: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); + return true; + case AArch64MCExpr::VK_GOTTPREL_G0_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); + return true; + default: + return false; } return false; } @@ -130,7 +132,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(PREL32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data " + Ctx.reportError(Fixup.getLoc(), + "ILP32 8 byte PC relative data " "relocation not supported (LP64 eqv: PREL64)"); return ELF::R_AARCH64_NONE; } else @@ -178,7 +181,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } } else { if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx)) - return ELF::R_AARCH64_NONE; + return ELF::R_AARCH64_NONE; switch ((unsigned)Fixup.getKind()) { case FK_Data_1: Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); @@ -189,8 +192,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(ABS32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data " - "relocation not supported (LP64 eqv: ABS64)"); + Ctx.reportError(Fixup.getLoc(), + "ILP32 8 byte absolute data " + "relocation not supported (LP64 eqv: ABS64)"); return ELF::R_AARCH64_NONE; } else return ELF::R_AARCH64_ABS64; @@ -262,7 +266,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte unchecked GOT load/store relocation " - "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); + "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); return ELF::R_AARCH64_NONE; } } @@ -270,12 +274,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (IsILP32) { Ctx.reportError(Fixup.getLoc(), "ILP32 4 byte checked GOT load/store relocation " - "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); + "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte checked GOT load/store relocation " - "not supported (unchecked/ILP32 eqv: " - "LD32_GOT_LO12_NC)"); + "not supported (unchecked/ILP32 eqv: " + "LD32_GOT_LO12_NC)"); } return ELF::R_AARCH64_NONE; } @@ -283,7 +287,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (IsILP32) { return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC; } else { - Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store " + Ctx.reportError(Fixup.getLoc(), + "LP64 32-bit load/store " "relocation not supported (ILP32 eqv: " "TLSIE_LD32_GOTTPREL_LO12_NC)"); return ELF::R_AARCH64_NONE; @@ -295,14 +300,14 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte TLSDESC load/store relocation " - "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); + "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); return ELF::R_AARCH64_NONE; } } Ctx.reportError(Fixup.getLoc(), "invalid fixup for 32-bit load/store instruction " - "fixup_aarch64_ldst_imm12_scale4"); + "fixup_aarch64_ldst_imm12_scale4"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale8: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) @@ -312,8 +317,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_LD64_GOT_LO12_NC; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "LD64_GOT_LO12_NC)"); + "relocation not supported (LP64 eqv: " + "LD64_GOT_LO12_NC)"); return ELF::R_AARCH64_NONE; } } @@ -330,8 +335,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "TLSIE_LD64_GOTTPREL_LO12_NC)"); + "relocation not supported (LP64 eqv: " + "TLSIE_LD64_GOTTPREL_LO12_NC)"); return ELF::R_AARCH64_NONE; } } @@ -340,8 +345,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_TLSDESC_LD64_LO12; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "TLSDESC_LD64_LO12)"); + "relocation not supported (LP64 eqv: " + "TLSDESC_LD64_LO12)"); return ELF::R_AARCH64_NONE; } } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 031aa8b81e35..a0de3c39562b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetStreamer.h" +#include "AArch64WinCOFFStreamer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -30,6 +31,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" @@ -210,6 +212,8 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); if (TT.isOSBinFormatELF()) return new AArch64TargetELFStreamer(S); + if (TT.isOSBinFormatCOFF()) + return new AArch64TargetWinCOFFStreamer(S); return nullptr; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h index 0f5b765c7697..4293dcba955e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -16,53 +16,47 @@ namespace llvm { namespace AArch64 { enum Fixups { - // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into - // an ADR instruction. + // A 21-bit pc-relative immediate inserted into an ADR instruction. fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind, - // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into - // an ADRP instruction. + // A 21-bit pc-relative immediate inserted into an ADRP instruction. fixup_aarch64_pcrel_adrp_imm21, - // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions. - // No alignment adjustment. All value bits are encoded. + // 12-bit fixup for add/sub instructions. No alignment adjustment. All value + // bits are encoded. fixup_aarch64_add_imm12, - // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and - // store instructions. + // unsigned 12-bit fixups for load and store instructions. fixup_aarch64_ldst_imm12_scale1, fixup_aarch64_ldst_imm12_scale2, fixup_aarch64_ldst_imm12_scale4, fixup_aarch64_ldst_imm12_scale8, fixup_aarch64_ldst_imm12_scale16, - // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by - // pc-relative loads and generates relocations directly when necessary. + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as + // fixup_aarch64_pcrel_adrhi, except this is used by pc-relative loads and + // generates relocations directly when necessary. fixup_aarch64_ldr_pcrel_imm19, // FIXME: comment fixup_aarch64_movw, - // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative - // immediate. + // The high 14 bits of a 21-bit pc-relative immediate. fixup_aarch64_pcrel_branch14, - // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by - // b.cc and generates relocations directly when necessary. + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as + // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates + // relocations directly when necessary. fixup_aarch64_pcrel_branch19, - // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative - // immediate. + // The high 26 bits of a 28-bit pc-relative immediate. fixup_aarch64_pcrel_branch26, - // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative - // immediate. Distinguished from branch26 only on ELF. + // The high 26 bits of a 28-bit pc-relative immediate. Distinguished from + // branch26 only on ELF. fixup_aarch64_pcrel_call26, - // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF - // R_AARCH64_TLSDESC_CALL relocation. + // zero-space placeholder for the ELF R_AARCH64_TLSDESC_CALL relocation. fixup_aarch64_tlsdesc_call, // Marker diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 1b28df963b40..fc808ee0cdd6 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -100,3 +100,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { HasIdentDirective = true; } + +AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { + CommentString = ";"; +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 253cd30f26ee..2d7107a37244 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H +#include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" @@ -33,6 +34,10 @@ struct AArch64MCAsmInfoELF : public MCAsmInfoELF { explicit AArch64MCAsmInfoELF(const Triple &T); }; +struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF { + explicit AArch64MCAsmInfoCOFF(); +}; + } // namespace llvm #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index f710065d9bc7..a2555496cdb9 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -14,6 +14,7 @@ #include "AArch64MCTargetDesc.h" #include "AArch64ELFStreamer.h" #include "AArch64MCAsmInfo.h" +#include "AArch64WinCOFFStreamer.h" #include "InstPrinter/AArch64InstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -59,8 +60,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) MAI = new AArch64MCAsmInfoDarwin(); + else if (TheTriple.isOSBinFormatCOFF()) + MAI = new AArch64MCAsmInfoCOFF(); else { - assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); + assert(TheTriple.isOSBinFormatELF() && "Invalid target"); MAI = new AArch64MCAsmInfoELF(TheTriple); } @@ -74,8 +77,8 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, CodeModel::Model &CM) { - assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) && - "Only expect Darwin and ELF targets"); + assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() || + TT.isOSBinFormatCOFF()) && "Invalid target"); if (CM == CodeModel::Default) CM = CodeModel::Small; @@ -122,6 +125,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, /*LabelSections*/ true); } +static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + IncrementalLinkerCompatible); +} + static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) { return new MCInstrAnalysis(Info); } @@ -154,6 +165,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() { // Register the obj streamers. TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer); + TargetRegistry::RegisterCOFFStreamer(*T, createWinCOFFStreamer); // Register the obj target streamer. TargetRegistry::RegisterObjectTargetStreamer( diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 615d7dab2c51..1404926b8124 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -60,6 +60,8 @@ MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType, uint32_t CPUSubtype); +MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS); + MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp new file mode 100644 index 000000000000..7862a03e771c --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -0,0 +1,65 @@ +//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +using namespace llvm; + +namespace { + +class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { +public: + AArch64WinCOFFObjectWriter() + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) { + } + + ~AArch64WinCOFFObjectWriter() override = default; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, + const MCAsmBackend &MAB) const override; + + bool recordRelocation(const MCFixup &) const override; +}; + +} // end anonymous namespace + +unsigned +AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection, + const MCAsmBackend &MAB) const { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); +} + +bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return true; +} + +namespace llvm { + +MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) { + MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter(); + return createWinCOFFObjectWriter(MOTW, OS); +} + +} // end namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp new file mode 100644 index 000000000000..6c8da27e398f --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -0,0 +1,37 @@ +//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64WinCOFFStreamer.h" + +using namespace llvm; + +namespace { + +class AArch64WinCOFFStreamer : public MCWinCOFFStreamer { +public: + friend class AArch64TargetWinCOFFStreamer; + + AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) {} +}; +} // end anonymous namespace + +namespace llvm { +MCWinCOFFStreamer +*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS); + S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); + return S; +} + +} // end llvm namespace diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h new file mode 100644 index 000000000000..1b4fcd6804e2 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h @@ -0,0 +1,43 @@ +//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements WinCOFF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H +#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H + +#include "AArch64TargetStreamer.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +namespace { +class AArch64WinCOFFStreamer; + +class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { +private: + AArch64WinCOFFStreamer &getStreamer(); + +public: + AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S) + : AArch64TargetStreamer(S) {} +}; + +} // end anonymous namespace + +namespace llvm { + +MCWinCOFFStreamer +*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible); +} // end llvm namespace + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 6d8be5e63fbb..56eeba8a1d4b 100644 --- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_library(LLVMAArch64Desc AArch64MCTargetDesc.cpp AArch64MachObjectWriter.cpp AArch64TargetStreamer.cpp + AArch64WinCOFFObjectWriter.cpp + AArch64WinCOFFStreamer.cpp ) add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) |
