diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-07-01 13:22:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-07-01 13:22:02 +0000 |
commit | 9df3605dea17e84f8183581f6103bd0c79e2a606 (patch) | |
tree | 70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib/Target | |
parent | 08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff) | |
download | src-9df3605dea17e84f8183581f6103bd0c79e2a606.tar.gz src-9df3605dea17e84f8183581f6103bd0c79e2a606.zip |
Notes
Diffstat (limited to 'lib/Target')
132 files changed, 5152 insertions, 1599 deletions
diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index f27bc97ec3f3..0a948812ff33 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -22,7 +22,7 @@ /// cbz w8, .LBB1_2 -> b.eq .LBB1_2 /// /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. -/// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2 +/// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2 /// //===----------------------------------------------------------------------===// @@ -129,11 +129,11 @@ MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { break; case AArch64::TBZW: case AArch64::TBZX: - CC = AArch64CC::GE; + CC = AArch64CC::PL; break; case AArch64::TBNZW: case AArch64::TBNZX: - CC = AArch64CC::LT; + CC = AArch64CC::MI; break; } return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) @@ -271,6 +271,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, } break; } + (void)NewCmp; (void)NewBr; assert(NewCmp && NewBr && "Expected new instructions."); DEBUG(dbgs() << " with instruction:\n "); diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp index 00a0111f2bd2..9eda56c825a9 100644 --- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -139,6 +140,7 @@ class SSACCmpConv { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + const MachineBranchProbabilityInfo *MBPI; public: /// The first block containing a conditional branch, dominating everything @@ -186,8 +188,10 @@ private: public: /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { + void runOnMachineFunction(MachineFunction &MF, + const MachineBranchProbabilityInfo *MBPI) { this->MF = &MF; + this->MBPI = MBPI; TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -564,8 +568,40 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) { // All CmpBB instructions are moved into Head, and CmpBB is deleted. // Update the CFG first. updateTailPHIs(); - Head->removeSuccessor(CmpBB, true); - CmpBB->removeSuccessor(Tail, true); + + // Save successor probabilties before removing CmpBB and Tail from their + // parents. + BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB); + BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail); + + Head->removeSuccessor(CmpBB); + CmpBB->removeSuccessor(Tail); + + // If Head and CmpBB had successor probabilties, udpate the probabilities to + // reflect the ccmp-conversion. + if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) { + + // Head is allowed two successors. We've removed CmpBB, so the remaining + // successor is Tail. We need to increase the successor probability for + // Tail to account for the CmpBB path we removed. + // + // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB). + assert(*Head->succ_begin() == Tail && "Head successor is not Tail"); + BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail); + Head->setSuccProbability(Head->succ_begin(), + Head2Tail + Head2CmpBB * CmpBB2Tail); + + // We will transfer successors of CmpBB to Head in a moment without + // normalizing the successor probabilities. Set the successor probabilites + // before doing so. + // + // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB). + for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) { + BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I); + CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I); + } + } + Head->transferSuccessorsAndUpdatePHIs(CmpBB); DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); TII->removeBranch(*Head); @@ -717,6 +753,7 @@ int SSACCmpConv::expectedCodeSizeDelta() const { namespace { class AArch64ConditionalCompares : public MachineFunctionPass { + const MachineBranchProbabilityInfo *MBPI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -753,6 +790,7 @@ char AArch64ConditionalCompares::ID = 0; INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", @@ -763,6 +801,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() { } void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); @@ -892,12 +931,13 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; MinSize = MF.getFunction()->optForMinSize(); bool Changed = false; - CmpConv.runOnMachineFunction(MF); + CmpConv.runOnMachineFunction(MF, MBPI); // Visit blocks in dominator tree pre-order. The pre-order enables multiple // cmp-conversions from the same head block. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2965106fd270..aaf32a499bc3 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7561,8 +7561,9 @@ bool AArch64TargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType()); - + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SVI->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SVI].push_back(SubVec); } } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index ad24612239fa..6cb723d187af 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -735,7 +735,7 @@ def : ShiftAlias<"rorv", RORVWr, GPR32>; def : ShiftAlias<"rorv", RORVXr, GPR64>; // Multiply-add -let AddedComplexity = 7 in { +let AddedComplexity = 5 in { defm MADD : MulAccum<0, "madd", add>; defm MSUB : MulAccum<1, "msub", sub>; @@ -752,7 +752,7 @@ def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -} // AddedComplexity = 7 +} // AddedComplexity = 5 let AddedComplexity = 5 in { def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 9bfd570e9a82..07ce0e863c5e 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -947,7 +947,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); if (DstRB.getID() != SrcRB.getID()) { - DEBUG(dbgs() << "G_TRUNC input/output on different banks\n"); + DEBUG(dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); return false; } @@ -964,16 +964,21 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { - DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); return false; } if (DstRC == SrcRC) { // Nothing to be done + } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && + SrcTy == LLT::scalar(64)) { + llvm_unreachable("TableGen can import this case"); + return false; } else if (DstRC == &AArch64::GPR32RegClass && SrcRC == &AArch64::GPR64RegClass) { I.getOperand(1).setSubReg(AArch64::sub_32); } else { + DEBUG(dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); return false; } diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 01196817f311..4b568f3fba2b 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -39,6 +39,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); + for (auto Ty : {p0, s1, s8, s16, s32, s64}) + setAction({G_IMPLICIT_DEF, Ty}, Legal); + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) { // These operations naturally get the right answer when used on // GPR32, even if the actual type is narrower. @@ -99,6 +102,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap). } + for (auto Ty : {s1, s8, s16, s32, s64, p0}) + setAction({G_EXTRACT, Ty}, Legal); + + for (auto Ty : {s32, s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) { for (auto Ty : {s8, s16, s32, s64, p0, v2s32}) setAction({MemOp, Ty}, Legal); diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 45083df7ab45..f82b9dbc2c9f 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -151,13 +151,24 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, return MCOperand::createExpr(Expr); } +MCOperand AArch64MCInstLower::lowerSymbolOperandCOFF(const MachineOperand &MO, + MCSymbol *Sym) const { + MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + return MCOperand::createExpr(Expr); +} + MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { if (Printer.TM.getTargetTriple().isOSDarwin()) return lowerSymbolOperandDarwin(MO, Sym); + if (Printer.TM.getTargetTriple().isOSBinFormatCOFF()) + return lowerSymbolOperandCOFF(MO, Sym); - assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && - "Expect Darwin or ELF target"); + assert(Printer.TM.getTargetTriple().isOSBinFormatELF() && "Invalid target"); return lowerSymbolOperandELF(MO, Sym); } diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h index 1e29b80c2d62..aa30fe1fa707 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.h +++ b/lib/Target/AArch64/AArch64MCInstLower.h @@ -42,6 +42,8 @@ public: MCSymbol *Sym) const; MCOperand lowerSymbolOperandELF(const MachineOperand &MO, MCSymbol *Sym) const; + MCOperand lowerSymbolOperandCOFF(const MachineOperand &MO, + MCSymbol *Sym) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index baf15ac540cf..fab92e139dd0 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -94,7 +94,7 @@ const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { if (TT.isOSDarwin()) return CSR_AArch64_TLS_Darwin_RegMask; - assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS"); + assert(TT.isOSBinFormatELF() && "Invalid target"); return CSR_AArch64_TLS_ELF_RegMask; } diff --git a/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 3654eeca530a..10df50bcf156 100644 --- a/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -1,4 +1,4 @@ -//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=// +//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 ---*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -79,75 +79,207 @@ def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>; // 60 entry unified scheduler. def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2, - THX2T99P3, THX2T99P4, THX2T99P5]> { - let BufferSize=60; + THX2T99P3, THX2T99P4, THX2T99P5]> { + let BufferSize = 60; } // Define commonly used write types for InstRW specializations. // All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>. // 3 cycles on I1. -def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; } +def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// 1 cycles on I2. +def THX2T99Write_1Cyc_I2 : SchedWriteRes<[THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} // 4 cycles on I1. -def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; } +def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// 23 cycles on I1. +def THX2T99Write_23Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; +} + +// 39 cycles on I1. +def THX2T99Write_39Cyc_I1 : SchedWriteRes<[THX2T99I1]> { + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; +} // 1 cycle on I0, I1, or I2. -def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; } +def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// 2 cycles on I0, I1, or I2. +def THX2T99Write_2Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 2; +} + +// 4 cycles on I0, I1, or I2. +def THX2T99Write_4Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} + +// 5 cycles on I0, I1, or I2. +def THX2T99Write_5Cyc_I012 : SchedWriteRes<[THX2T99I012]> { + let Latency = 2; + let NumMicroOps = 3; +} // 5 cycles on F1. -def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; } +def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 5; + let NumMicroOps = 2; +} // 7 cycles on F1. -def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; } +def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { + let Latency = 7; + let NumMicroOps = 2; +} // 4 cycles on F0 or F1. -def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; } +def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} // 5 cycles on F0 or F1. -def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; } +def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} // 6 cycles on F0 or F1. -def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; } +def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let NumMicroOps = 3; +} // 7 cycles on F0 or F1. -def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; } +def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} // 8 cycles on F0 or F1. -def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; } +def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 8; + let NumMicroOps = 3; +} + +// 10 cycles on F0 or F1. +def THX2T99Write_10Cyc_F01 : SchedWriteRes<[THX2T99F01]> { + let Latency = 10; + let NumMicroOps = 3; +} // 16 cycles on F0 or F1. def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 16; + let NumMicroOps = 3; let ResourceCycles = [8]; } // 23 cycles on F0 or F1. def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 23; + let NumMicroOps = 3; let ResourceCycles = [11]; } // 1 cycles on LS0 or LS1. -def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; } +def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 0; +} + +// 1 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 2; +} + +// 1 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_1Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 0; + let NumMicroOps = 3; +} + +// 2 cycles on LS0 or LS1. +def THX2T99Write_2Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 1; + let NumMicroOps = 2; +} // 4 cycles on LS0 or LS1. -def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; } +def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} // 5 cycles on LS0 or LS1. -def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; } +def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 5; + let NumMicroOps = 3; +} // 6 cycles on LS0 or LS1. -def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; } +def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { + let Latency = 6; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} + +// 4 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_4Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 4; + let NumMicroOps = 3; +} // 5 cycles on LS0 or LS1 and I0, I1, or I2. def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { let Latency = 5; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2. -def THX2T99Write_6Cyc_LS01_I012_I012 : +def THX2T99Write_5Cyc_LS01_I012_I012 : + SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +// 6 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 6; + let NumMicroOps = 4; +} + +// 6 cycles on LS0 or LS1 and 2 of I0, I1, or I2. +def THX2T99Write_6Cyc_LS01_I012_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> { let Latency = 6; let NumMicroOps = 3; @@ -162,25 +294,25 @@ def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { // 5 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 5; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 6 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 6; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 7 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 7; - let NumMicroOps = 2; + let NumMicroOps = 3; } // 8 cycles on LS0 or LS1 and F0 or F1. def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> { let Latency = 8; - let NumMicroOps = 2; + let NumMicroOps = 3; } // Define commonly used read types. @@ -195,10 +327,8 @@ def : ReadAdvance<ReadID, 0>; def : ReadAdvance<ReadExtrHi, 0>; def : ReadAdvance<ReadAdrBase, 0>; def : ReadAdvance<ReadVLD, 0>; - } - //===----------------------------------------------------------------------===// // 3. Instruction Tables. @@ -211,88 +341,217 @@ let SchedModel = ThunderX2T99Model in { // Branch, immed // Branch and link, immed // Compare and branch -def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; } +def : WriteRes<WriteBr, [THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Branch, register +// Branch and link, register != LR +// Branch and link, register = LR +def : WriteRes<WriteBrReg, [THX2T99I2]> { + let Latency = 1; + let NumMicroOps = 2; +} def : WriteRes<WriteSys, []> { let Latency = 1; } def : WriteRes<WriteBarrier, []> { let Latency = 1; } def : WriteRes<WriteHint, []> { let Latency = 1; } -def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } +def : WriteRes<WriteAtomic, []> { + let Unsupported = 1; + let NumMicroOps = 2; +} -// Branch, register -// Branch and link, register != LR -// Branch and link, register = LR -def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; } +//--- +// Branch +//--- +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs B, BL, BR, BLR)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instrs RET)>; +def : InstRW<[THX2T99Write_1Cyc_I2], (instregex "^B.*")>; +def : InstRW<[THX2T99Write_1Cyc_I2], + (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; //--- // 3.2 Arithmetic and Logical Instructions // 3.3 Move and Shift Instructions //--- + // ALU, basic // Conditional compare // Conditional select // Address generation -def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteI, [THX2T99I012]> { + let Latency = 1; + let ResourceCycles = [1, 3]; + let NumMicroOps = 2; +} + +def : InstRW<[WriteI], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + def : InstRW<[WriteI], (instrs COPY)>; // ALU, extend and/or shift def : WriteRes<WriteISReg, [THX2T99I012]> { let Latency = 2; - let ResourceCycles = [2]; + let ResourceCycles = [2, 3]; + let NumMicroOps = 2; } +def : InstRW<[WriteISReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + def : WriteRes<WriteIEReg, [THX2T99I012]> { - let Latency = 2; - let ResourceCycles = [2]; + let Latency = 1; + let ResourceCycles = [1, 3]; + let NumMicroOps = 2; } +def : InstRW<[WriteIEReg], + (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", + "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", + "ADC?(W|X)r(i|r|s|x)", "ADCS?(W|X)r(i|r|s|x)", + "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", + "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", + "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", + "SUBS?(W|X)r(i|r|s|x)", "SBC?(W|X)r(i|r|s|x)", + "SBCS?(W|X)r(i|r|s|x)", "CCMN?(W|X)r(i|r|s|x)", + "CCMP?(W|X)r(i|r|s|x)", "CSEL?(W|X)r(i|r|s|x)", + "CSINC?(W|X)r(i|r|s|x)", "CSINV?(W|X)r(i|r|s|x)", + "CSNEG?(W|X)r(i|r|s|x)")>; + // Move immed -def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteImm, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; + +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; // Variable shift -def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteIS, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} //--- // 3.4 Divide and Multiply Instructions //--- // Divide, W-form -// Latency range of 13-23. Take the average. +// Latency range of 13-23/13-39. def : WriteRes<WriteID32, [THX2T99I1]> { - let Latency = 18; - let ResourceCycles = [18]; + let Latency = 39; + let ResourceCycles = [13, 39]; + let NumMicroOps = 4; } // Divide, X-form -// Latency range of 13-39. Take the average. def : WriteRes<WriteID64, [THX2T99I1]> { - let Latency = 26; - let ResourceCycles = [26]; + let Latency = 23; + let ResourceCycles = [13, 23]; + let NumMicroOps = 4; } // Multiply accumulate, W-form -def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; } +def : WriteRes<WriteIM32, [THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} // Multiply accumulate, X-form -def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; } +def : WriteRes<WriteIM64, [THX2T99I012]> { + let Latency = 5; + let NumMicroOps = 3; +} + +//def : InstRW<[WriteIM32, ReadIM, ReadIM, ReadIMA, THX2T99Write_5Cyc_I012], +// (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; +def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; +def : InstRW<[THX2T99Write_5Cyc_I012], + (instregex "(S|U)(MADDL|MSUBL)rrr")>; + +def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; // Bitfield extract, two reg -def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteExtr, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Multiply high +def : InstRW<[THX2T99Write_4Cyc_I1], (instrs SMULHrr, UMULHrr)>; + +// Miscellaneous Data-Processing Instructions +// Bitfield extract +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs EXTRWrri, EXTRXrri)>; + +// Bitifield move - basic +def : InstRW<[THX2T99Write_1Cyc_I012], + (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; -// Bitfield move, basic // Bitfield move, insert -// NOTE: Handled by WriteIS. +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "^BFM")>; +def : InstRW<[THX2T99Write_1Cyc_I012], (instregex "(S|U)?BFM.*")>; // Count leading def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", - "^CLZ(W|X)r$")>; + "^CLZ(W|X)r$")>; + +// Reverse bits +def : InstRW<[THX2T99Write_1Cyc_I012], (instrs RBITWr, RBITXr)>; + +// Cryptography Extensions +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES[DE]")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AESI?MC")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1(H|SU1)")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA1[CMP]")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256SU0")>; +def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA256(H|H2|SU1)")>; + +// CRC Instructions +// def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32", "^CRC32C")>; +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32Brr, CRC32Hrr, CRC32Wrr, CRC32Xrr)>; + +def : InstRW<[THX2T99Write_4Cyc_I1], + (instrs CRC32CBrr, CRC32CHrr, CRC32CWrr, CRC32CXrr)>; // Reverse bits/bytes // NOTE: Handled by WriteI. //--- -// 3.6 Load Instructions +// 3.6 Load Instructions // 3.10 FP Load Instructions //--- @@ -300,13 +559,29 @@ def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$", // Load register, unscaled immed // Load register, immed unprivileged // Load register, unsigned immed -def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; } +def : WriteRes<WriteLD, [THX2T99LS01]> { + let Latency = 4; + let NumMicroOps = 4; +} // Load register, immed post-index // NOTE: Handled by WriteLD, WriteI. // Load register, immed pre-index // NOTE: Handled by WriteLD, WriteAdr. -def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; } +def : WriteRes<WriteAdr, [THX2T99I012]> { + let Latency = 1; + let NumMicroOps = 2; +} + +// Load pair, immed offset, normal +// Load pair, immed offset, signed words, base != SP +// Load pair, immed offset signed words, base = SP +// LDP only breaks into *one* LS micro-op. Thus +// the resources are handled by WriteLD. +def : WriteRes<WriteLDHi, []> { + let Latency = 5; + let NumMicroOps = 5; +} // Load register offset, basic // Load register, register offset, scale by 4/8 @@ -324,23 +599,229 @@ def THX2T99ReadAdrBase : SchedReadVariant<[ SchedVar<NoSchedPred, [ReadDefault]>]>; def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>; -// Load pair, immed offset, normal -// Load pair, immed offset, signed words, base != SP -// Load pair, immed offset signed words, base = SP -// LDP only breaks into *one* LS micro-op. Thus -// the resources are handling by WriteLD. -def : WriteRes<WriteLDHi, []> { - let Latency = 5; -} - // Load pair, immed pre-index, normal // Load pair, immed pre-index, signed words // Load pair, immed post-index, normal // Load pair, immed post-index, signed words // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDNPXi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPDi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPQi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPSWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPWi)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi], (instrs LDPXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRBui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDui)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRHui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRQui)>; +def : InstRW<[THX2T99Write_5Cyc_LS01], (instrs LDRSui)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRDl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRQl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRWl)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDRXl)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRXi)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDTRSWi)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSBXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHWpost)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRSHXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRBBpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpre)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, WriteAdr], (instrs LDRHHpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRBpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRDpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRHpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRQpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRSpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRWpre)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteAdr], (instrs LDRXpre)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteLDHi, WriteAdr], + (instrs LDPXpost)>; + +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRBpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRDpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRHpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRQpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRSpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRWpost)>; +def : InstRW<[THX2T99Write_5Cyc_LS01_I012_I012, WriteI], (instrs LDRXpost)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroW)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012, ReadAdrBase], (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroW)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRBroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRDroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRHHroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRQroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRSHXroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRWroX)>; +def : InstRW<[THX2T99Write_4Cyc_LS01_I012_I012, ReadAdrBase], + (instrs LDRXroX)>; + +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURBBi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURDi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURHHi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURQi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSBXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHWi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSHXi)>; +def : InstRW<[THX2T99Write_4Cyc_LS01], (instrs LDURSWi)>; + +//--- +// Prefetch +//--- +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMl)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFUMi)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMui)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroW)>; +def : InstRW<[THX2T99Write_6Cyc_LS01_I012], (instrs PRFMroX)>; + //-- -// 3.7 Store Instructions +// 3.7 Store Instructions // 3.11 FP Store Instructions //-- @@ -382,6 +863,195 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> { // Store pair, immed pre-index, X-form // NOTE: Handled by WriteAdr, WriteSTP. +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURBBi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURHHi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURSi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURWi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STURXi)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRBi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRHi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRWi)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01], (instrs STTRXi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STNPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPDi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPQi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPXi)>; +def : InstRW<[THX2T99Write_1Cyc_LS01], (instrs STPWi)>; + +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRBui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRDui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRHui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRQui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRXui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012_I012], (instrs STRWui)>; +def : InstRW<[THX2T99Write_1Cyc_LS01_I012], (instrs STRWui)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPDpre, STPDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPQpre, STPQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPSpre, STPSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPWpre, STPWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STPXpre, STPXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STPXpre, STPXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBpre, STRBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBpre, STRBBpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDpre, STRDpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHpre, STRHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHpre, STRHHpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQpre, STRQpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSpre, STRSpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWpre, STRWpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012], + (instrs STRXpre, STRXpost)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXpre, STRXpost)>; + +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBroW, STRBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRBBroW, STRBBroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRDroW, STRDroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHroW, STRHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRHHroW, STRHHroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRQroW, STRQroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRSroW, STRSroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRWroW, STRWroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; +def : InstRW<[WriteAdr, THX2T99Write_1Cyc_LS01_I012_I012, ReadAdrBase], + (instrs STRXroW, STRXroX)>; + //--- // 3.8 FP Data Processing Instructions //--- @@ -389,28 +1059,95 @@ def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> { // FP absolute value // FP min/max // FP negate -def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; } +def : WriteRes<WriteF, [THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} // FP arithmetic def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>; // FP compare -def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; } +def : WriteRes<WriteFCmp, [THX2T99F01]> { + let Latency = 5; + let NumMicroOps = 2; +} -// FP divide, S-form -// FP square root, S-form -def : WriteRes<WriteFDiv, [THX2T99F01]> { +// FP Mul, Div, Sqrt +def : WriteRes<WriteFDiv, [THX2T99F01]> { + let Latency = 22; + let ResourceCycles = [19]; +} + +def THX2T99XWriteFDiv : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFDivSP : SchedWriteRes<[THX2T99F01]> { let Latency = 16; let ResourceCycles = [8]; + let NumMicroOps = 4; } +def THX2T99XWriteFDivDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtSP : SchedWriteRes<[THX2T99F01]> { + let Latency = 16; + let ResourceCycles = [8]; + let NumMicroOps = 4; +} + +def THX2T99XWriteFSqrtDP : SchedWriteRes<[THX2T99F01]> { + let Latency = 23; + let ResourceCycles = [12]; + let NumMicroOps = 4; +} + +// FP divide, S-form +// FP square root, S-form +def : InstRW<[THX2T99XWriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instrs FSQRTSr)>; +def : InstRW<[THX2T99XWriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[THX2T99XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "^FDIVSrr", "^FSQRTSrr")>; + // FP divide, D-form // FP square root, D-form -def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instrs FSQRTDr)>; +def : InstRW<[THX2T99XWriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[THX2T99XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "^FDIVDrr", "^FSQRTDrr")>; // FP multiply // FP multiply accumulate -def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; } +def : WriteRes<WriteFMul, [THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMul : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def THX2T99XWriteFMulAcc : SchedWriteRes<[THX2T99F01]> { + let Latency = 6; + let ResourceCycles = [2]; + let NumMicroOps = 3; +} + +def : InstRW<[THX2T99XWriteFMul], (instregex "^FMUL", "^FNMUL")>; +def : InstRW<[THX2T99XWriteFMulAcc], + (instregex "^FMADD", "^FMSUB", "^FNMADD", "^FNMSUB")>; // FP round to integral def : InstRW<[THX2T99Write_7Cyc_F01], @@ -426,15 +1163,25 @@ def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>; // FP convert, from vec to vec reg // FP convert, from gen to vec reg // FP convert, from vec to gen reg -def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; } +def : WriteRes<WriteFCvt, [THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 3; +} // FP move, immed // FP move, register -def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; } +def : WriteRes<WriteFImm, [THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} // FP transfer, from gen to vec reg // FP transfer, from vec to gen reg -def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; } +def : WriteRes<WriteFCopy, [THX2T99F01]> { + let Latency = 4; + let NumMicroOps = 2; +} + def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; //--- @@ -470,19 +1217,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>; // ASIMD shift by register, basic, Q-form // ASIMD shift by register, complex, D-form // ASIMD shift by register, complex, Q-form -def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; } +def : WriteRes<WriteV, [THX2T99F01]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [4, 23]; +} // ASIMD arith, reduce, 4H/4S // ASIMD arith, reduce, 8B/8H // ASIMD arith, reduce, 16B -def : InstRW<[THX2T99Write_5Cyc_F01], - (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD logical (MOV, MVN, ORN, ORR) -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^ANDv", "^BICv", "^EORv", "^MOVv", "^MVNv", + "^ORRv", "^ORNv", "^NOTv")>; +// ASIMD arith, reduce +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; // ASIMD polynomial (8x8) multiply long -def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^(S|U|SQD)MULL")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; +def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^PMULL(v8i8|v16i8)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^PMULL(v1i64|v2i64)")>; + +// ASIMD absolute diff accum, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +// ASIMD absolute diff accum, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +// ASIMD absolute diff accum long +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]ABAL")>; +// ASIMD arith, reduce, 4H/4S +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; +// ASIMD arith, reduce, 8B +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; +// ASIMD arith, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU]?ADDL?Vv16i8v$")>; +// ASIMD max/min, reduce, 4H/4S +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; +// ASIMD max/min, reduce, 8B/8H +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; +// ASIMD max/min, reduce, 16B/16H +def : InstRW<[THX2T99Write_10Cyc_F01], + (instregex "^[SU](MIN|MAX)Vv16i8v$")>; +// ASIMD multiply, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)" # + "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # + "(_indexed)?$")>; +// ASIMD multiply, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD multiply accumulate, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +// ASIMD multiply accumulate, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +// ASIMD shift accumulate +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// ASIMD shift by immed, basic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "RSHRNv","SHRNv", "SQRSHRNv","SQRSHRUNv", + "SQSHRNv","SQSHRUNv", "UQRSHRNv", + "UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; +// ASIMD shift by immed, complex +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^[SU]?(Q|R){1,2}SHR")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SQSHLU")>; +// ASIMD shift by register, basic, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// ASIMD shift by register, complex, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL" # + "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; +// ASIMD shift by register, complex, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD Arithmetic +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "(RADD|RSUB)HNv.*")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", + "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # + "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADALP","^UADALP")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLPv","^UADDLPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SADDLV","^UADDLV")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDVv","^SMAXVv","^UMAXVv","^SMINVv","^UMINVv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABAv","^UABAv","^SABALv","^UABALv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQADDv","^SQSUBv","^UQADDv","^UQSUBv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^SUQADDv","^USQADDv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^ADDHNv","^RADDHNv", "^RSUBHNv", + "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", + "^SRHADD", "^SUBHNv", "^SUQADD", + "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^CMEQv","^CMGEv","^CMGTv", + "^CMLEv","^CMLTv", "^CMHIv","^CMHSv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SMAXv","^SMINv","^UMAXv","^UMINv", + "^SMAXPv","^SMINPv","^UMAXPv","^UMINPv")>; +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SABDv","^UABDv", "^SABDLv","^UABDLv")>; //--- // 3.13 ASIMD Floating-point Instructions @@ -493,7 +1356,8 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>; // ASIMD FP arith, normal, D-form // ASIMD FP arith, normal, Q-form -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FABDv", "^FADDv", "^FSUBv")>; // ASIMD FP arith,pairwise, D-form // ASIMD FP arith, pairwise, Q-form @@ -503,8 +1367,15 @@ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>; // ASIMD FP compare, Q-form def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>; def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", - "^FCMGTv", "^FCMLEv", - "^FCMLTv")>; + "^FCMGTv", "^FCMLEv", + "^FCMLTv")>; + +// ASIMD FP round, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v2f32)")>; +// ASIMD FP round, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; // ASIMD FP convert, long // ASIMD FP convert, narrow @@ -512,14 +1383,26 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv", // ASIMD FP convert, other, Q-form // NOTE: Handled by WriteV. +// ASIMD FP convert, long and narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^FCVT(L|N|XN)v")>; +// ASIMD FP convert, other, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; +// ASIMD FP convert, other, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; + // ASIMD FP divide, D-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv2f32")>; // ASIMD FP divide, Q-form, F32 def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>; +def : InstRW<[THX2T99Write_16Cyc_F01], (instregex "FDIVv4f32")>; // ASIMD FP divide, Q-form, F64 def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>; +def : InstRW<[THX2T99Write_23Cyc_F01], (instregex "FDIVv2f64")>; // ASIMD FP max/min, normal, D-form // ASIMD FP max/min, normal, Q-form @@ -540,20 +1423,24 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv", // ASIMD FP multiply, Q-form, FZ // ASIMD FP multiply, Q-form, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP multiply accumulate, Dform, FZ // ASIMD FP multiply accumulate, Dform, no FZ // ASIMD FP multiply accumulate, Qform, FZ // ASIMD FP multiply accumulate, Qform, no FZ def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; +def : InstRW<[THX2T99Write_6Cyc_F01], + (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; // ASIMD FP negate def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>; -// ASIMD FP round, D-form -// ASIMD FP round, Q-form -// NOTE: Handled by WriteV. - //-- // 3.14 ASIMD Miscellaneous Instructions //-- @@ -563,37 +1450,66 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>; // ASIMD bitwise insert, D-form // ASIMD bitwise insert, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^BIFv", "^BITv", "^BSLv")>; // ASIMD count, D-form // ASIMD count, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^CLSv", "^CLZv", "^CNTv")>; // ASIMD duplicate, gen reg // ASIMD duplicate, element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CPY")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv.+gpr")>; // ASIMD extract def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>; // ASIMD extract narrow +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^XTNv")>; + // ASIMD extract narrow, saturating -// NOTE: Handled by WriteV. +def : InstRW<[THX2T99Write_7Cyc_F01], + (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; // ASIMD insert, element to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; +// ASIMD transfer, element to gen reg +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; + // ASIMD move, integer immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>; // ASIMD move, FP immed def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>; +// ASIMD table lookup, D-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v8i8Four")>; + +// ASIMD table lookup, Q-form +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8One")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Two")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Three")>; +def : InstRW<[THX2T99Write_7Cyc_F01], (instregex "^TB[LX]v16i8Four")>; + +// ASIMD transpose +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1", "^TRN2")>; + +// ASIMD unzip/zip +def : InstRW<[THX2T99Write_5Cyc_F01], + (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; + // ASIMD reciprocal estimate, D-form // ASIMD reciprocal estimate, Q-form -def : InstRW<[THX2T99Write_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", - "^FRSQRTEv", "^URSQRTEv")>; + "^FRSQRTEv", "^URSQRTEv")>; // ASIMD reciprocal step, D-form, FZ // ASIMD reciprocal step, D-form, no FZ @@ -602,7 +1518,7 @@ def : InstRW<[THX2T99Write_5Cyc_F01], def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>; // ASIMD reverse -def : InstRW<[THX2T99Write_5Cyc_F01], +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^REV16v", "^REV32v", "^REV64v")>; // ASIMD table lookup, D-form @@ -610,135 +1526,135 @@ def : InstRW<[THX2T99Write_5Cyc_F01], def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>; // ASIMD transfer, element to word or word -def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>; +def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^[SU]MOVv")>; // ASIMD transfer, element to gen reg -def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>; +def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "(S|U)MOVv.*")>; // ASIMD transfer gen reg to element def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>; // ASIMD transpose def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v", - "^UZP1v", "^UZP2v")>; + "^UZP1v", "^UZP2v")>; // ASIMD unzip/zip def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>; //-- -// 3.15 ASIMD Load Instructions +// 3.15 ASIMD Load Instructions //-- // ASIMD load, 1 element, multiple, 1 reg, D-form // ASIMD load, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form // ASIMD load, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_4Cyc_LS01], +def : InstRW<[THX2T99Write_4Cyc_LS01], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form // ASIMD load, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01], +def : InstRW<[THX2T99Write_5Cyc_LS01], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form // ASIMD load, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_6Cyc_LS01], +def : InstRW<[THX2T99Write_6Cyc_LS01], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D // ASIMD load, 1 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D // ASIMD load, 2 element, all lanes, Q-form -def : InstRW<[THX2T99Write_5Cyc_LS01_F01], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lone, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_7Cyc_LS01_F01], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_8Cyc_LS01_F01], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr], (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[THX2T99Write_6Cyc_LS01_F01], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; //-- @@ -747,106 +1663,83 @@ def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr], // ASIMD store, 1 element, multiple, 1 reg, D-form // ASIMD store, 1 element, multiple, 1 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form // ASIMD store, 1 element, multiple, 2 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form // ASIMD store, 1 element, multiple, 3 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form // ASIMD store, 1 element, multiple, 4 reg, Q-form -def : InstRW<[THX2T99Write_1Cyc_LS01], +def : InstRW<[THX2T99Write_1Cyc_LS01], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr], (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST1i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST2i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, B/H/S // ASIMD store, 4 element, multiple, Q-form, D -def : InstRW<[THX2T99Write_1Cyc_LS01_F01], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H // ASIMD store, 4 element, one lane, S // ASIMD store, 4 element, one lane, D def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>; -def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], +def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4i(8|16|32|64)_POST$")>; -//-- -// 3.17 Cryptography Extensions -//-- - -// Crypto AES ops -def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>; - -// Crypto polynomial (64x64) multiply long -def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>; - -// Crypto SHA1 xor ops -// Crypto SHA1 schedule acceleration ops -// Crypto SHA256 schedule acceleration op (1 u-op) -// Crypto SHA256 schedule acceleration op (2 u-ops) -// Crypto SHA256 hash acceleration ops -def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>; - -//-- -// 3.18 CRC -//-- - -// CRC checksum ops -def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>; - } // SchedModel = ThunderX2T99Model + diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 6660f0babb8a..1252f9403812 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -167,6 +167,8 @@ extern "C" void LLVMInitializeAArch64Target() { static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) return llvm::make_unique<AArch64_MachoTargetObjectFile>(); + if (TT.isOSBinFormatCOFF()) + return llvm::make_unique<AArch64_COFFTargetObjectFile>(); return llvm::make_unique<AArch64_ELFTargetObjectFile>(); } @@ -179,6 +181,8 @@ static std::string computeDataLayout(const Triple &TT, return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; if (TT.isOSBinFormatMachO()) return "e-m:o-i64:64-i128:128-n32:64-S128"; + if (TT.isOSBinFormatCOFF()) + return "e-m:w-i64:64-i128:128-n32:64-S128"; if (LittleEndian) return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 2c75a3258c1c..fefa7e26b79f 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -36,6 +36,7 @@ public: ~AArch64TargetMachine() override; const AArch64Subtarget *getSubtargetImpl(const Function &F) const override; + const AArch64Subtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 47e3bce43f6e..9077eb7902fd 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -45,6 +45,9 @@ public: const TargetMachine &TM) const override; }; +/// This implementation is used for AArch64 COFF targets. +class AArch64_COFFTargetObjectFile : public TargetLoweringObjectFileCOFF {}; + } // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4328682b93c..a76f080530bb 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -20,6 +20,23 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" +static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", + cl::init(true), cl::Hidden); + +bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // Inline a callee if its target-features are a subset of the callers + // target-features. + return (CallerBits & CalleeBits) == CalleeBits; +} + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. @@ -631,10 +648,62 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { return ST->getMaxInterleaveFactor(); } -void AArch64TTIImpl::getUnrollingPreferences(Loop *L, +// For Falkor, we want to avoid having too many strided loads in a loop since +// that can exhaust the HW prefetcher resources. We adjust the unroller +// MaxCount preference below to attempt to ensure unrolling doesn't create too +// many strided loads. +static void +getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TargetTransformInfo::UnrollingPreferences &UP) { + enum { MaxStridedLoads = 7 }; + auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) { + int StridedLoads = 0; + // FIXME? We could make this more precise by looking at the CFG and + // e.g. not counting loads in each side of an if-then-else diamond. + for (const auto BB : L->blocks()) { + for (auto &I : *BB) { + LoadInst *LMemI = dyn_cast<LoadInst>(&I); + if (!LMemI) + continue; + + Value *PtrValue = LMemI->getPointerOperand(); + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE.getSCEV(PtrValue); + const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); + if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) + continue; + + // FIXME? We could take pairing of unrolled load copies into account + // by looking at the AddRec, but we would probably have to limit this + // to loops with no stores or other memory optimization barriers. + ++StridedLoads; + // We've seen enough strided loads that seeing more won't make a + // difference. + if (StridedLoads > MaxStridedLoads / 2) + return StridedLoads; + } + } + return StridedLoads; + }; + + int StridedLoads = countStridedLoads(L, SE); + DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads + << " strided loads\n"); + // Pick the largest power of 2 unroll count that won't result in too many + // strided loads. + if (StridedLoads) { + UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads); + DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount + << '\n'); + } +} + +void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Enable partial unrolling and runtime unrolling. - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); // For inner loop, it is more likely to be a hot one, and the runtime check // can be promoted out from LICM pass, so the overhead is less, let's try @@ -644,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; + + if (ST->getProcFamily() == AArch64Subtarget::Falkor && + EnableFalkorHWPFUnrollFix) + getFalkorUnrollingPreferences(L, SE, UP); } Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index 290a1ca1f24b..31c037354925 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -51,6 +51,9 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + /// \name Scalar TTI Implementations /// @{ @@ -119,7 +122,8 @@ public: int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 3d075018904c..475f91016840 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -541,14 +541,13 @@ public: return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32); } - void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; }; -void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target, - bool &IsResolved) { +bool ELFAArch64AsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { // The ADRP instruction adds some multiple of 0x1000 to the current PC & // ~0xfff. This means that the required offset to reach a symbol can vary by // up to one step depending on where the ADRP is in memory. For example: @@ -562,11 +561,24 @@ void ELFAArch64AsmBackend::processFixupValue(const MCAssembler &Asm, // section isn't 0x1000-aligned, we therefore need to delegate this decision // to the linker -- a relocation! if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21) - IsResolved = false; + return true; + return false; } } +namespace { +class COFFAArch64AsmBackend : public AArch64AsmBackend { +public: + COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple) + : AArch64AsmBackend(T, /*IsLittleEndian*/true) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createAArch64WinCOFFObjectWriter(OS); + } +}; +} + MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TheTriple, @@ -575,7 +587,11 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, if (TheTriple.isOSBinFormatMachO()) return new DarwinAArch64AsmBackend(T, MRI); - assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); + if (TheTriple.isOSBinFormatCOFF()) + return new COFFAArch64AsmBackend(T, TheTriple); + + assert(TheTriple.isOSBinFormatELF() && "Invalid target"); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); bool IsILP32 = Options.getABIName() == "ilp32"; return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index f7dda92fb551..89c3e5b4c76e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -49,10 +49,11 @@ AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, /*HasRelocationAddend*/ true), IsILP32(IsILP32) {} -#define R_CLS(rtype) \ - IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype -#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\ - "supported (LP64 eqv: " #lp64rtype ")" +#define R_CLS(rtype) \ + IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype +#define BAD_ILP32_MOV(lp64rtype) \ + "ILP32 absolute MOV relocation not " \ + "supported (LP64 eqv: " #lp64rtype ")" // assumes IsILP32 is true static bool isNonILP32reloc(const MCFixup &Fixup, @@ -60,44 +61,45 @@ static bool isNonILP32reloc(const MCFixup &Fixup, MCContext &Ctx) { if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw) return false; - switch(RefKind) { - case AArch64MCExpr::VK_ABS_G3: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3)); - return true; - case AArch64MCExpr::VK_ABS_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2)); - return true; - case AArch64MCExpr::VK_ABS_G2_S: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); - return true; - case AArch64MCExpr::VK_ABS_G2_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); - return true; - case AArch64MCExpr::VK_ABS_G1_S: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); - return true; - case AArch64MCExpr::VK_ABS_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); - return true; - case AArch64MCExpr::VK_DTPREL_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); - return true; - case AArch64MCExpr::VK_DTPREL_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); - return true; - case AArch64MCExpr::VK_TPREL_G2: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); - return true; - case AArch64MCExpr::VK_TPREL_G1_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); - return true; - case AArch64MCExpr::VK_GOTTPREL_G1: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); - return true; - case AArch64MCExpr::VK_GOTTPREL_G0_NC: - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); - return true; - default: return false; + switch (RefKind) { + case AArch64MCExpr::VK_ABS_G3: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3)); + return true; + case AArch64MCExpr::VK_ABS_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2)); + return true; + case AArch64MCExpr::VK_ABS_G2_S: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); + return true; + case AArch64MCExpr::VK_ABS_G2_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); + return true; + case AArch64MCExpr::VK_ABS_G1_S: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); + return true; + case AArch64MCExpr::VK_ABS_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); + return true; + case AArch64MCExpr::VK_DTPREL_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); + return true; + case AArch64MCExpr::VK_DTPREL_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); + return true; + case AArch64MCExpr::VK_TPREL_G2: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); + return true; + case AArch64MCExpr::VK_TPREL_G1_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); + return true; + case AArch64MCExpr::VK_GOTTPREL_G1: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); + return true; + case AArch64MCExpr::VK_GOTTPREL_G0_NC: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); + return true; + default: + return false; } return false; } @@ -130,7 +132,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(PREL32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data " + Ctx.reportError(Fixup.getLoc(), + "ILP32 8 byte PC relative data " "relocation not supported (LP64 eqv: PREL64)"); return ELF::R_AARCH64_NONE; } else @@ -178,7 +181,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } } else { if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx)) - return ELF::R_AARCH64_NONE; + return ELF::R_AARCH64_NONE; switch ((unsigned)Fixup.getKind()) { case FK_Data_1: Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported"); @@ -189,8 +192,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(ABS32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data " - "relocation not supported (LP64 eqv: ABS64)"); + Ctx.reportError(Fixup.getLoc(), + "ILP32 8 byte absolute data " + "relocation not supported (LP64 eqv: ABS64)"); return ELF::R_AARCH64_NONE; } else return ELF::R_AARCH64_ABS64; @@ -262,7 +266,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte unchecked GOT load/store relocation " - "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); + "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); return ELF::R_AARCH64_NONE; } } @@ -270,12 +274,12 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (IsILP32) { Ctx.reportError(Fixup.getLoc(), "ILP32 4 byte checked GOT load/store relocation " - "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); + "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte checked GOT load/store relocation " - "not supported (unchecked/ILP32 eqv: " - "LD32_GOT_LO12_NC)"); + "not supported (unchecked/ILP32 eqv: " + "LD32_GOT_LO12_NC)"); } return ELF::R_AARCH64_NONE; } @@ -283,7 +287,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (IsILP32) { return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC; } else { - Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store " + Ctx.reportError(Fixup.getLoc(), + "LP64 32-bit load/store " "relocation not supported (ILP32 eqv: " "TLSIE_LD32_GOTTPREL_LO12_NC)"); return ELF::R_AARCH64_NONE; @@ -295,14 +300,14 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, } else { Ctx.reportError(Fixup.getLoc(), "LP64 4 byte TLSDESC load/store relocation " - "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); + "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); return ELF::R_AARCH64_NONE; } } Ctx.reportError(Fixup.getLoc(), "invalid fixup for 32-bit load/store instruction " - "fixup_aarch64_ldst_imm12_scale4"); + "fixup_aarch64_ldst_imm12_scale4"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale8: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) @@ -312,8 +317,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_LD64_GOT_LO12_NC; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "LD64_GOT_LO12_NC)"); + "relocation not supported (LP64 eqv: " + "LD64_GOT_LO12_NC)"); return ELF::R_AARCH64_NONE; } } @@ -330,8 +335,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "TLSIE_LD64_GOTTPREL_LO12_NC)"); + "relocation not supported (LP64 eqv: " + "TLSIE_LD64_GOTTPREL_LO12_NC)"); return ELF::R_AARCH64_NONE; } } @@ -340,8 +345,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_AARCH64_TLSDESC_LD64_LO12; } else { Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " - "relocation not supported (LP64 eqv: " - "TLSDESC_LD64_LO12)"); + "relocation not supported (LP64 eqv: " + "TLSDESC_LD64_LO12)"); return ELF::R_AARCH64_NONE; } } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 031aa8b81e35..a0de3c39562b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetStreamer.h" +#include "AArch64WinCOFFStreamer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" @@ -30,6 +31,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" @@ -210,6 +212,8 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { const Triple &TT = STI.getTargetTriple(); if (TT.isOSBinFormatELF()) return new AArch64TargetELFStreamer(S); + if (TT.isOSBinFormatCOFF()) + return new AArch64TargetWinCOFFStreamer(S); return nullptr; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h index 0f5b765c7697..4293dcba955e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -16,53 +16,47 @@ namespace llvm { namespace AArch64 { enum Fixups { - // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into - // an ADR instruction. + // A 21-bit pc-relative immediate inserted into an ADR instruction. fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind, - // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into - // an ADRP instruction. + // A 21-bit pc-relative immediate inserted into an ADRP instruction. fixup_aarch64_pcrel_adrp_imm21, - // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions. - // No alignment adjustment. All value bits are encoded. + // 12-bit fixup for add/sub instructions. No alignment adjustment. All value + // bits are encoded. fixup_aarch64_add_imm12, - // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and - // store instructions. + // unsigned 12-bit fixups for load and store instructions. fixup_aarch64_ldst_imm12_scale1, fixup_aarch64_ldst_imm12_scale2, fixup_aarch64_ldst_imm12_scale4, fixup_aarch64_ldst_imm12_scale8, fixup_aarch64_ldst_imm12_scale16, - // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by - // pc-relative loads and generates relocations directly when necessary. + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as + // fixup_aarch64_pcrel_adrhi, except this is used by pc-relative loads and + // generates relocations directly when necessary. fixup_aarch64_ldr_pcrel_imm19, // FIXME: comment fixup_aarch64_movw, - // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative - // immediate. + // The high 14 bits of a 21-bit pc-relative immediate. fixup_aarch64_pcrel_branch14, - // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by - // b.cc and generates relocations directly when necessary. + // The high 19 bits of a 21-bit pc-relative immediate. Same encoding as + // fixup_aarch64_pcrel_adrhi, except this is use by b.cc and generates + // relocations directly when necessary. fixup_aarch64_pcrel_branch19, - // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative - // immediate. + // The high 26 bits of a 28-bit pc-relative immediate. fixup_aarch64_pcrel_branch26, - // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative - // immediate. Distinguished from branch26 only on ELF. + // The high 26 bits of a 28-bit pc-relative immediate. Distinguished from + // branch26 only on ELF. fixup_aarch64_pcrel_call26, - // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF - // R_AARCH64_TLSDESC_CALL relocation. + // zero-space placeholder for the ELF R_AARCH64_TLSDESC_CALL relocation. fixup_aarch64_tlsdesc_call, // Marker diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 1b28df963b40..fc808ee0cdd6 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -100,3 +100,7 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { HasIdentDirective = true; } + +AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { + CommentString = ";"; +} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 253cd30f26ee..2d7107a37244 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64MCASMINFO_H +#include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" #include "llvm/MC/MCAsmInfoELF.h" @@ -33,6 +34,10 @@ struct AArch64MCAsmInfoELF : public MCAsmInfoELF { explicit AArch64MCAsmInfoELF(const Triple &T); }; +struct AArch64MCAsmInfoCOFF : public MCAsmInfoCOFF { + explicit AArch64MCAsmInfoCOFF(); +}; + } // namespace llvm #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index f710065d9bc7..a2555496cdb9 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -14,6 +14,7 @@ #include "AArch64MCTargetDesc.h" #include "AArch64ELFStreamer.h" #include "AArch64MCAsmInfo.h" +#include "AArch64WinCOFFStreamer.h" #include "InstPrinter/AArch64InstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -59,8 +60,10 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) MAI = new AArch64MCAsmInfoDarwin(); + else if (TheTriple.isOSBinFormatCOFF()) + MAI = new AArch64MCAsmInfoCOFF(); else { - assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); + assert(TheTriple.isOSBinFormatELF() && "Invalid target"); MAI = new AArch64MCAsmInfoELF(TheTriple); } @@ -74,8 +77,8 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM, CodeModel::Model &CM) { - assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) && - "Only expect Darwin and ELF targets"); + assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() || + TT.isOSBinFormatCOFF()) && "Invalid target"); if (CM == CodeModel::Default) CM = CodeModel::Small; @@ -122,6 +125,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, /*LabelSections*/ true); } +static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + IncrementalLinkerCompatible); +} + static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) { return new MCInstrAnalysis(Info); } @@ -154,6 +165,7 @@ extern "C" void LLVMInitializeAArch64TargetMC() { // Register the obj streamers. TargetRegistry::RegisterELFStreamer(*T, createELFStreamer); TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer); + TargetRegistry::RegisterCOFFStreamer(*T, createWinCOFFStreamer); // Register the obj target streamer. TargetRegistry::RegisterObjectTargetStreamer( diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 615d7dab2c51..1404926b8124 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -60,6 +60,8 @@ MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType, uint32_t CPUSubtype); +MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS); + MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp new file mode 100644 index 000000000000..7862a03e771c --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -0,0 +1,65 @@ +//= AArch64WinCOFFObjectWriter.cpp - AArch64 Windows COFF Object Writer C++ =// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> + +using namespace llvm; + +namespace { + +class AArch64WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { +public: + AArch64WinCOFFObjectWriter() + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARM64) { + } + + ~AArch64WinCOFFObjectWriter() override = default; + + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, + const MCAsmBackend &MAB) const override; + + bool recordRelocation(const MCFixup &) const override; +}; + +} // end anonymous namespace + +unsigned +AArch64WinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection, + const MCAsmBackend &MAB) const { + const MCFixupKindInfo &Info = MAB.getFixupKindInfo(Fixup.getKind()); + report_fatal_error(Twine("unsupported relocation type: ") + Info.Name); +} + +bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return true; +} + +namespace llvm { + +MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) { + MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter(); + return createWinCOFFObjectWriter(MOTW, OS); +} + +} // end namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp new file mode 100644 index 000000000000..6c8da27e398f --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -0,0 +1,37 @@ +//===-- AArch64WinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64WinCOFFStreamer.h" + +using namespace llvm; + +namespace { + +class AArch64WinCOFFStreamer : public MCWinCOFFStreamer { +public: + friend class AArch64TargetWinCOFFStreamer; + + AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) {} +}; +} // end anonymous namespace + +namespace llvm { +MCWinCOFFStreamer +*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS); + S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); + return S; +} + +} // end llvm namespace diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h new file mode 100644 index 000000000000..1b4fcd6804e2 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h @@ -0,0 +1,43 @@ +//===-- AArch64WinCOFFStreamer.h - WinCOFF Streamer for AArch64 -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements WinCOFF streamer information for the AArch64 backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H +#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64WINCOFFSTREAMER_H + +#include "AArch64TargetStreamer.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +namespace { +class AArch64WinCOFFStreamer; + +class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { +private: + AArch64WinCOFFStreamer &getStreamer(); + +public: + AArch64TargetWinCOFFStreamer(llvm::MCStreamer &S) + : AArch64TargetStreamer(S) {} +}; + +} // end anonymous namespace + +namespace llvm { + +MCWinCOFFStreamer +*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible); +} // end llvm namespace + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 6d8be5e63fbb..56eeba8a1d4b 100644 --- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -8,6 +8,8 @@ add_llvm_library(LLVMAArch64Desc AArch64MCTargetDesc.cpp AArch64MachObjectWriter.cpp AArch64TargetStreamer.cpp + AArch64WinCOFFObjectWriter.cpp + AArch64WinCOFFStreamer.cpp ) add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 55d18c3f3646..5a799b2d88d0 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -36,7 +36,6 @@ FunctionPass *createR600ControlFlowFinalizer(); FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes -FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 7494e5decd6f..f1d899c4d003 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -262,8 +262,8 @@ def FeatureSDWAMac : SubtargetFeature<"sdwa-mav", "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension" >; -def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc", - "HasSDWAClampVOPC", +def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc", + "HasSDWAOutModsVOPC", "true", "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension" >; @@ -452,7 +452,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP + FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP ] >; diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 2071b6f157cd..9a391d06c9ea 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -1776,7 +1776,7 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, E = EndMBB->succ_end(); PI != E; ++PI) { // Either we have a back-edge to the entry block, or a back-edge to the - // succesor of the entry block since the block may be split. + // successor of the entry block since the block may be split. if ((*PI) != StartMBB && !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { Succs.insert( @@ -1831,7 +1831,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( IfBB->addSuccessor(CodeBBStart); DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); - // Ensure that the MergeBB is a succesor of the CodeEndBB. + // Ensure that the MergeBB is a successor of the CodeEndBB. if (!CodeBBEnd->isSuccessor(MergeBB)) CodeBBEnd->addSuccessor(MergeBB); diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index ab5abf2039a5..be47b900c6f0 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -128,7 +128,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasSDWAScalar(false), HasSDWASdst(false), HasSDWAMac(false), - HasSDWAClampVOPC(false), + HasSDWAOutModsVOPC(false), HasDPP(false), FlatAddressSpace(false), FlatInstOffsets(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 2b16289c723e..22cede59086a 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -153,7 +153,7 @@ protected: bool HasSDWAScalar; bool HasSDWASdst; bool HasSDWAMac; - bool HasSDWAClampVOPC; + bool HasSDWAOutModsVOPC; bool HasDPP; bool FlatAddressSpace; bool FlatInstOffsets; @@ -452,8 +452,8 @@ public: return HasSDWAMac; } - bool hasSDWAClampVOPC() const { - return HasSDWAClampVOPC; + bool hasSDWAOutModsVOPC() const { + return HasSDWAOutModsVOPC; } /// \brief Returns the offset in bytes from the start of the input buffer diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 04fe9f689806..425fd35d47de 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -720,7 +720,6 @@ bool GCNPassConfig::addPreISel() { addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions } addPass(createSinkingPass()); - addPass(createSITypeRewriter()); addPass(createAMDGPUAnnotateUniformValues()); if (!LateCFGStructurize) { addPass(createSIAnnotateControlFlowPass()); diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 88245b01683a..89a03902dc69 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -63,7 +63,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, return false; } -void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, +void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Threshold = 300; // Twice the default. UP.MaxCount = UINT_MAX; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 485e20411ab4..9a320bdfcc3d 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -68,7 +68,8 @@ public: bool hasBranchDivergence() { return true; } - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index e30844f082cd..917d9cfa6905 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -96,7 +96,6 @@ add_llvm_target(AMDGPUCodeGen SIPeepholeSDWA.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp - SITypeRewriter.cpp SIWholeQuadMode.cpp GCNIterativeScheduler.cpp GCNMinRegStrategy.cpp diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 04308fb3aaf6..f26e49295e69 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -626,7 +626,9 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, using namespace AMDGPU::SDWA; if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { - if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + // XXX: static_cast<int> is needed to avoid stupid warning: + // compare with unsigned is always true + if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) && Val <= SDWA9EncValues::SRC_VGPR_MAX) { return createRegOperand(getVgprClassId(Width), Val - SDWA9EncValues::SRC_VGPR_MIN); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d0f4e00994de..d39b345bdf03 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4314,6 +4314,23 @@ SDValue SITargetLowering::splitBinaryBitConstantOp( return SDValue(); } +// Returns true if argument is a boolean value which is not serialized into +// memory or argument and does not require v_cmdmask_b32 to be deserialized. +static bool isBoolSGPR(SDValue V) { + if (V.getValueType() != MVT::i1) + return false; + switch (V.getOpcode()) { + default: break; + case ISD::SETCC: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case AMDGPUISD::FP_CLASS: + return true; + } + return false; +} + SDValue SITargetLowering::performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.isBeforeLegalize()) @@ -4402,6 +4419,16 @@ SDValue SITargetLowering::performAndCombine(SDNode *N, } } + if (VT == MVT::i32 && + (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) { + // and x, (sext cc from i1) => select cc, x, 0 + if (RHS.getOpcode() != ISD::SIGN_EXTEND) + std::swap(LHS, RHS); + if (isBoolSGPR(RHS.getOperand(0))) + return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0), + LHS, DAG.getConstant(0, SDLoc(N), MVT::i32)); + } + return SDValue(); } @@ -4941,8 +4968,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N, case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: { auto Cond = RHS.getOperand(0); - if (Cond.getOpcode() != ISD::SETCC && - Cond.getOpcode() != AMDGPUISD::FP_CLASS) + if (!isBoolSGPR(Cond)) break; SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1); SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond }; @@ -5109,6 +5135,35 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); EVT VT = LHS.getValueType(); + ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + auto CRHS = dyn_cast<ConstantSDNode>(RHS); + if (!CRHS) { + CRHS = dyn_cast<ConstantSDNode>(LHS); + if (CRHS) { + std::swap(LHS, RHS); + CC = getSetCCSwappedOperands(CC); + } + } + + if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND && + isBoolSGPR(LHS.getOperand(0))) { + // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1 + // setcc (sext from i1 cc), -1, eq|sle|uge) => cc + // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1 + // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc + if ((CRHS->isAllOnesValue() && + (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) || + (CRHS->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE))) + return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), + DAG.getConstant(-1, SL, MVT::i1)); + if ((CRHS->isAllOnesValue() && + (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) || + (CRHS->isNullValue() && + (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT))) + return LHS.getOperand(0); + } if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() && VT != MVT::f16)) @@ -5116,7 +5171,6 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, // Match isinf pattern // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity)) - ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) { const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS); if (!CRHS) diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index c9b48fea7225..b6784ec14e9f 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -770,7 +770,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine); + Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); } return; @@ -871,7 +871,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. - Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine); + Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead); } return; @@ -2444,8 +2444,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); - if ( DstIdx == -1) - DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst); const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx }; @@ -2488,14 +2486,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI"; return false; } - } else if (!ST.hasSDWAClampVOPC()) { + } else if (!ST.hasSDWAOutModsVOPC()) { // No clamp allowed on GFX9 for VOPC const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp); - if (Clamp != nullptr && - (!Clamp->isImm() || Clamp->getImm() != 0)) { + if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) { ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI"; return false; } + + // No omod allowed on GFX9 for VOPC + const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) { + ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI"; + return false; + } } } } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 3b4a8b5d1e81..4a81fb3b463a 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -336,6 +336,10 @@ def NegSubInlineConst16 : ImmLeaf<i16, [{ return Imm < -16 && Imm >= -64; }], NegateImm>; +def ShiftAmt32Imm : PatLeaf <(imm), [{ + return N->getZExtValue() < 32; +}]>; + //===----------------------------------------------------------------------===// // Custom Operands //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 3b4bdc864253..bcc685015cf5 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -929,6 +929,14 @@ def : UMad24Pat<V_MAD_U32_U24>; defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>; def : ROTRPattern <V_ALIGNBIT_B32>; +def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), + (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), + (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; + +def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), + (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), + (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; + /********** ====================== **********/ /********** Indirect addressing **********/ /********** ====================== **********/ diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 4ac23ef03cb3..e2ac6631d2f3 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -627,10 +627,13 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, return false; } - if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp)) + if (!ST.hasSDWAOutModsVOPC() && + (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) || + TII->hasModifiersSet(MI, AMDGPU::OpName::omod))) return false; - } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { + } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) || + !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { return false; } @@ -649,25 +652,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); - // Copy dst, if it is present in original then should also be present in SDWA - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); - if (!Dst && !TII->isVOPC(MI)) - return false; - const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); // Create SDWA version of instruction MI and initialize its operands MachineInstrBuilder SDWAInst = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); + // Copy dst, if it is present in original then should also be present in SDWA + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); - } else { - Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { assert(Dst && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); SDWAInst.add(*Dst); + } else { + assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + SDWAInst.addReg(AMDGPU::VCC, RegState::Define); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -714,20 +716,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy omod if present, initialize otherwise if needed - MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); - if (OMod) { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1); - SDWAInst.add(*OMod); - } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { - SDWAInst.addImm(0); + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { + MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); + if (OMod) { + SDWAInst.add(*OMod); + } else { + SDWAInst.addImm(0); + } } - // Initialize dst_sel and dst_unused if present - if (Dst) { - assert( - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1); + // Initialize dst_sel if present + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); + } + + // Initialize dst_unused if present + if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); } diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp deleted file mode 100644 index aad68537f779..000000000000 --- a/lib/Target/AMDGPU/SITypeRewriter.cpp +++ /dev/null @@ -1,156 +0,0 @@ -//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass removes performs the following type substitution on all -/// non-compute shaders: -/// -/// v16i8 => i128 -/// - v16i8 is used for constant memory resource descriptors. This type is -/// legal for some compute APIs, and we don't want to declare it as legal -/// in the backend, because we want the legalizer to expand all v16i8 -/// operations. -/// v1* => * -/// - Having v1* types complicates the legalizer and we can easily replace -/// - them with the element type. -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstVisitor.h" - -using namespace llvm; - -namespace { - -class SITypeRewriter : public FunctionPass, - public InstVisitor<SITypeRewriter> { - - static char ID; - Module *Mod; - Type *v16i8; - Type *v4i32; - -public: - SITypeRewriter() : FunctionPass(ID) { } - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - StringRef getPassName() const override { return "SI Type Rewriter"; } - void visitLoadInst(LoadInst &I); - void visitCallInst(CallInst &I); - void visitBitCast(BitCastInst &I); -}; - -} // End anonymous namespace - -char SITypeRewriter::ID = 0; - -bool SITypeRewriter::doInitialization(Module &M) { - Mod = &M; - v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16); - v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4); - return false; -} - -bool SITypeRewriter::runOnFunction(Function &F) { - if (!AMDGPU::isShader(F.getCallingConv())) - return false; - - visit(F); - visit(F); - - return false; -} - -void SITypeRewriter::visitLoadInst(LoadInst &I) { - Value *Ptr = I.getPointerOperand(); - Type *PtrTy = Ptr->getType(); - Type *ElemTy = PtrTy->getPointerElementType(); - IRBuilder<> Builder(&I); - if (ElemTy == v16i8) { - Value *BitCast = Builder.CreateBitCast(Ptr, - PointerType::get(v4i32,PtrTy->getPointerAddressSpace())); - LoadInst *Load = Builder.CreateLoad(BitCast); - SmallVector<std::pair<unsigned, MDNode *>, 8> MD; - I.getAllMetadataOtherThanDebugLoc(MD); - for (unsigned i = 0, e = MD.size(); i != e; ++i) { - Load->setMetadata(MD[i].first, MD[i].second); - } - Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType()); - I.replaceAllUsesWith(BitCastLoad); - I.eraseFromParent(); - } -} - -void SITypeRewriter::visitCallInst(CallInst &I) { - IRBuilder<> Builder(&I); - - SmallVector <Value*, 8> Args; - SmallVector <Type*, 8> Types; - bool NeedToReplace = false; - Function *F = I.getCalledFunction(); - if (!F) - return; - - std::string Name = F->getName(); - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - Value *Arg = I.getArgOperand(i); - if (Arg->getType() == v16i8) { - Args.push_back(Builder.CreateBitCast(Arg, v4i32)); - Types.push_back(v4i32); - NeedToReplace = true; - Name = Name + ".v4i32"; - } else if (Arg->getType()->isVectorTy() && - Arg->getType()->getVectorNumElements() == 1 && - Arg->getType()->getVectorElementType() == - Type::getInt32Ty(I.getContext())){ - Type *ElementTy = Arg->getType()->getVectorElementType(); - std::string TypeName = "i32"; - InsertElementInst *Def = cast<InsertElementInst>(Arg); - Args.push_back(Def->getOperand(1)); - Types.push_back(ElementTy); - std::string VecTypeName = "v1" + TypeName; - Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName); - NeedToReplace = true; - } else { - Args.push_back(Arg); - Types.push_back(Arg->getType()); - } - } - - if (!NeedToReplace) { - return; - } - Function *NewF = Mod->getFunction(Name); - if (!NewF) { - NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod); - NewF->setAttributes(F->getAttributes()); - } - I.replaceAllUsesWith(Builder.CreateCall(NewF, Args)); - I.eraseFromParent(); -} - -void SITypeRewriter::visitBitCast(BitCastInst &I) { - IRBuilder<> Builder(&I); - if (I.getDestTy() != v4i32) { - return; - } - - if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) { - if (Op->getSrcTy() == v4i32) { - I.replaceAllUsesWith(Op->getOperand(0)); - I.eraseFromParent(); - } - } -} - -FunctionPass *llvm::createSITypeRewriter() { - return new SITypeRewriter(); -} diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 6f67183df6a1..c40b4450a5b5 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -222,6 +222,13 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true", "Has return address stack">; +// Some processors have no branch predictor, which changes the expected cost of +// taking a branch which affects the choice of whether to use predicated +// instructions. +def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor", + "HasBranchPredictor", "false", + "Has no branch predictor">; + /// DSP extension. def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Supports DSP instructions in ARM and/or Thumb2">; @@ -262,6 +269,10 @@ def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true", "Generate calls via indirect call " "instructions">; +def FeatureExecuteOnly + : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", + "Enable the generation of execute only code.">; + def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", "Reserve R9, making it unavailable as " "GPR">; @@ -540,7 +551,7 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>; // // Dummy CPU, used to target architectures -def : ProcNoItin<"generic", []>; +def : ProcessorModel<"generic", CortexA8Model, []>; def : ProcNoItin<"arm8", [ARMv4]>; def : ProcNoItin<"arm810", [ARMv4]>; @@ -756,13 +767,19 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>; -def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>; +def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; + +def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, + ProcM3, + FeatureHasNoBranchPredictor]>; -def : ProcNoItin<"cortex-m4", [ARMv7em, +def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, FeatureVFP4, FeatureVFPOnlySP, - FeatureD16]>; + FeatureD16, + FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, FeatureFPARMv8, @@ -771,11 +788,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em, def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, FeatureNoMovt]>; -def : ProcNoItin<"cortex-m33", [ARMv8mMainline, +def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, - FeatureVFPOnlySP]>; + FeatureVFPOnlySP, + FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-a32", [ARMv8a, FeatureHWDivThumb, diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index e0810c358f2d..1ec6b24b2ed6 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1851,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } bool ARMBaseInstrInfo:: -isProfitableToIfCvt(MachineBasicBlock &, +isProfitableToIfCvt(MachineBasicBlock &TBB, unsigned TCycles, unsigned TExtra, - MachineBasicBlock &, + MachineBasicBlock &FBB, unsigned FCycles, unsigned FExtra, BranchProbability Probability) const { if (!TCycles) @@ -1863,14 +1863,43 @@ isProfitableToIfCvt(MachineBasicBlock &, // Here we scale up each component of UnpredCost to avoid precision issue when // scaling TCycles/FCycles by Probability. const unsigned ScalingUpFactor = 1024; - unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); - unsigned FUnpredCost = + + unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; + unsigned UnpredCost; + if (!Subtarget.hasBranchPredictor()) { + // When we don't have a branch predictor it's always cheaper to not take a + // branch than take it, so we have to take that into account. + unsigned NotTakenBranchCost = 1; + unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); + unsigned TUnpredCycles, FUnpredCycles; + if (!FCycles) { + // Triangle: TBB is the fallthrough + TUnpredCycles = TCycles + NotTakenBranchCost; + FUnpredCycles = TakenBranchCost; + } else { + // Diamond: TBB is the block that is branched to, FBB is the fallthrough + TUnpredCycles = TCycles + TakenBranchCost; + FUnpredCycles = FCycles + NotTakenBranchCost; + } + // The total cost is the cost of each path scaled by their probabilites + unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor); + UnpredCost = TUnpredCost + FUnpredCost; + // When predicating assume that the first IT can be folded away but later + // ones cost one cycle each + if (Subtarget.isThumb2() && TCycles + FCycles > 4) { + PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; + } + } else { + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = Probability.getCompl().scale(FCycles * ScalingUpFactor); - unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1 * ScalingUpFactor; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + UnpredCost = TUnpredCost + FUnpredCost; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + } - return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; + return PredCost <= UnpredCost; } bool diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2bcc707e9fc3..e42514acd76f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -7580,6 +7580,9 @@ static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { SDValue VHi = DAG.getAnyExtOrTrunc( DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)), dl, MVT::i32); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + if (isBigEndian) + std::swap (VLo, VHi); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); @@ -7607,10 +7610,14 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N, MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1); - Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, - SDValue(CmpSwap, 0))); - Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, - SDValue(CmpSwap, 0))); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0, + SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1, + SDLoc(N), MVT::i32, SDValue(CmpSwap, 0))); Results.push_back(SDValue(CmpSwap, 2)); } diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 423f97ccacd6..891a8f482f0a 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1416,12 +1416,12 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def tTBB_JT : tPseudoInst<(outs), - (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, - Sched<[WriteBr]>; + (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, + IIC_Br, []>, Sched<[WriteBr]>; def tTBH_JT : tPseudoInst<(outs), - (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, - Sched<[WriteBr]>; + (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, + IIC_Br, []>, Sched<[WriteBr]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 4cb0eca5ee5f..374176d1d737 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -46,6 +46,10 @@ private: MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const; + bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const; + const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; const ARMBaseTargetMachine &TM; @@ -346,6 +350,50 @@ bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB, return true; } +bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + auto &MBB = *MIB->getParent(); + auto InsertBefore = std::next(MIB->getIterator()); + auto &DebugLoc = MIB->getDebugLoc(); + + // Compare the condition to 0. + auto CondReg = MIB->getOperand(1).getReg(); + assert(MRI.getType(CondReg).getSizeInBits() == 1 && + RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported types for select operation"); + auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri)) + .addUse(CondReg) + .addImm(0) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Move a value into the result register based on the result of the + // comparison. + auto ResReg = MIB->getOperand(0).getReg(); + auto TrueReg = MIB->getOperand(2).getReg(); + auto FalseReg = MIB->getOperand(3).getReg(); + assert(MRI.getType(ResReg) == MRI.getType(TrueReg) && + MRI.getType(TrueReg) == MRI.getType(FalseReg) && + MRI.getType(FalseReg).getSizeInBits() == 32 && + RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported types for select operation"); + auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr)) + .addDef(ResReg) + .addUse(TrueReg) + .addUse(FalseReg) + .add(predOps(ARMCC::EQ, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -448,6 +496,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } case G_ICMP: return selectICmp(MIB, TII, MRI, TRI, RBI); + case G_SELECT: + return selectSelect(MIB, TII, MRI, TRI, RBI); case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 5873c7fb3872..f3e62d09cc30 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -55,10 +55,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (unsigned Op : {G_SDIV, G_UDIV}) { for (auto Ty : {s8, s16}) - // FIXME: We need WidenScalar here, but in the case of targets with - // software division we'll also need Libcall afterwards. Treat as Custom - // until we have better support for chaining legalization actions. - setAction({Op, Ty}, Custom); + setAction({Op, Ty}, WidenScalar); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Legal); else @@ -84,6 +81,10 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); + setAction({G_SELECT, s32}, Legal); + setAction({G_SELECT, p0}, Legal); + setAction({G_SELECT, 1, s1}, Legal); + setAction({G_CONSTANT, s32}, Legal); setAction({G_ICMP, s1}, Legal); @@ -118,40 +119,6 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, switch (MI.getOpcode()) { default: return false; - case G_SDIV: - case G_UDIV: { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8)) - return false; - - // We need to widen to 32 bits and then maybe, if the target requires, - // transform into a libcall. - LegalizerHelper Helper(MIRBuilder.getMF()); - - MachineInstr *NewMI = nullptr; - Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { - // Store the new, 32-bit div instruction. - if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV) - NewMI = MI; - }); - - auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32)); - Helper.MIRBuilder.stopRecordingInsertions(); - if (Result == LegalizerHelper::UnableToLegalize) { - return false; - } - assert(NewMI && "Couldn't find widened instruction"); - assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) && - "Unexpected widened instruction"); - assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 && - "Unexpected type for the widened instruction"); - - Result = Helper.legalizeInstrStep(*NewMI); - if (Result == LegalizerHelper::UnableToLegalize) { - return false; - } - return true; - } case G_SREM: case G_UREM: { unsigned OriginalResult = MI.getOperand(0).getReg(); diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 2350d0c6ef69..11fb81a4f9fe 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -255,6 +255,18 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; + case G_SELECT: { + LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); + assert(Ty2.getSizeInBits() == 1 && "Unsupported size for G_SELECT"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } case G_ICMP: { LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); (void)Ty2; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 02cbfb1fa9f1..b10583bc7983 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -245,6 +245,10 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { // the general GPR register class above (MOV, e.g.) def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; +// Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow +// the PC to be used as a destination operand as well. +def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>; + // The high registers in thumb mode, R8-R15. def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 1c7902520f2d..53e012f13ee2 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -424,3 +424,4 @@ include "ARMScheduleA9.td" include "ARMScheduleSwift.td" include "ARMScheduleR52.td" include "ARMScheduleA57.td" +include "ARMScheduleM3.td" diff --git a/lib/Target/ARM/ARMScheduleM3.td b/lib/Target/ARM/ARMScheduleM3.td new file mode 100644 index 000000000000..93f8299f9bd0 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleM3.td @@ -0,0 +1,21 @@ +//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the ARM Cortex-M3 processor. +// +//===----------------------------------------------------------------------===// + +def CortexM3Model : SchedMachineModel { + let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue + let MicroOpBufferSize = 0; // In-order + let LoadLatency = 2; // Latency when not pipelined, not pc-relative + let MispredictPenalty = 2; // Best case branch taken cost + + let CompleteModel = 0; +} diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index d9d0c27c6304..2c42a1336166 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -11,6 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "ARM.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "ARMCallLowering.h" +#include "ARMLegalizerInfo.h" +#include "ARMRegisterBankInfo.h" +#endif #include "ARMSubtarget.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" @@ -23,6 +30,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -78,11 +92,6 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } -/// EnableExecuteOnly - Enables the generation of execute-only code on supported -/// targets -static cl::opt<bool> -EnableExecuteOnly("arm-execute-only"); - ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, StringRef FS) { ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); @@ -92,13 +101,41 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, return new ARMFrameLowering(STI); } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct ARMGISelActualAccessor : public GISelAccessor { + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(TM.Options), TM(TM), - FrameLowering(initializeFrameLowering(CPU, FS)), + CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), + TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -106,7 +143,29 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM, *this) {} + TLInfo(TM, *this) { + assert((isThumb() || hasARMOps()) && + "Target must either be thumb or support ARM operations!"); + +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new ARMLegalizerInfo(*this)); + + auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + GISel->InstSelector.reset(createARMInstructionSelector( + *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI)); + + GISel->RegBankInfo.reset(RBI); +#endif + setGISelAccessor(*GISel); +} const CallLowering *ARMSubtarget::getCallLowering() const { assert(GISel && "Access to GlobalISel APIs not set"); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index d890d0fa777e..e15b17512c96 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -246,6 +246,11 @@ protected: /// avoid issue "normal" call instructions to callees which do not return. bool HasRetAddrStack = false; + /// HasBranchPredictor - True if the subtarget has a branch predictor. Having + /// a branch predictor or not changes the expected cost of taking a branch + /// which affects the choice of whether to use predicated instructions. + bool HasBranchPredictor = true; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension = false; @@ -554,6 +559,7 @@ public: bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRetAddrStack() const { return HasRetAddrStack; } + bool hasBranchPredictor() const { return HasBranchPredictor; } bool hasMPExtension() const { return HasMPExtension; } bool hasDSP() const { return HasDSP; } bool useNaClTrap() const { return UseNaClTrap; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index eb71e557ec91..c323a1d368de 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,11 +11,6 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMCallLowering.h" -#include "ARMLegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "ARMRegisterBankInfo.h" -#endif #include "ARMSubtarget.h" #include "ARMMacroFusion.h" #include "ARMTargetMachine.h" @@ -29,7 +24,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" @@ -110,60 +104,20 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { static ARMBaseTargetMachine::ARMABI computeTargetABI(const Triple &TT, StringRef CPU, const TargetOptions &Options) { - if (Options.MCOptions.getABIName() == "aapcs16") + StringRef ABIName = Options.MCOptions.getABIName(); + + if (ABIName.empty()) + ABIName = ARM::computeDefaultTargetABI(TT, CPU); + + if (ABIName == "aapcs16") return ARMBaseTargetMachine::ARM_ABI_AAPCS16; - else if (Options.MCOptions.getABIName().startswith("aapcs")) + else if (ABIName.startswith("aapcs")) return ARMBaseTargetMachine::ARM_ABI_AAPCS; - else if (Options.MCOptions.getABIName().startswith("apcs")) + else if (ABIName.startswith("apcs")) return ARMBaseTargetMachine::ARM_ABI_APCS; - assert(Options.MCOptions.getABIName().empty() && - "Unknown target-abi option!"); - - ARMBaseTargetMachine::ARMABI TargetABI = - ARMBaseTargetMachine::ARM_ABI_UNKNOWN; - - unsigned ArchKind = ARM::parseCPUArch(CPU); - StringRef ArchName = ARM::getArchName(ArchKind); - // FIXME: This is duplicated code from the front end and should be unified. - if (TT.isOSBinFormatMachO()) { - if (TT.getEnvironment() == Triple::EABI || - (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) || - ARM::parseArchProfile(ArchName) == ARM::PK_M) { - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else if (TT.isWatchABI()) { - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16; - } else { - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - } - } else if (TT.isOSWindows()) { - // FIXME: this is invalid for WindowsCE - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - } else { - // Select the default based on the platform. - switch (TT.getEnvironment()) { - case Triple::Android: - case Triple::GNUEABI: - case Triple::GNUEABIHF: - case Triple::MuslEABI: - case Triple::MuslEABIHF: - case Triple::EABIHF: - case Triple::EABI: - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - break; - case Triple::GNU: - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - break; - default: - if (TT.isOSNetBSD()) - TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; - else - TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; - break; - } - } - - return TargetABI; + llvm_unreachable("Unhandled/unknown ABI Name!"); + return ARMBaseTargetMachine::ARM_ABI_UNKNOWN; } static std::string computeDataLayout(const Triple &TT, StringRef CPU, @@ -248,61 +202,39 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM, OL), TargetABI(computeTargetABI(TT, CPU, Options)), - TLOF(createTLOF(getTargetTriple())), - Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { + TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) { // Default to triple-appropriate float ABI - if (Options.FloatABIType == FloatABI::Default) - this->Options.FloatABIType = - Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) { + if (TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABIHF || + TargetTriple.getEnvironment() == Triple::EABIHF || + TargetTriple.isOSWindows() || + TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) + this->Options.FloatABIType = FloatABI::Hard; + else + this->Options.FloatABIType = FloatABI::Soft; + } // Default to triple-appropriate EABI if (Options.EABIVersion == EABI::Default || Options.EABIVersion == EABI::Unknown) { // musl is compatible with glibc with regard to EABI version - if (Subtarget.isTargetGNUAEABI() || Subtarget.isTargetMuslAEABI()) + if ((TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABI || + TargetTriple.getEnvironment() == Triple::MuslEABIHF) && + !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) this->Options.EABIVersion = EABI::GNU; else this->Options.EABIVersion = EABI::EABI5; } initAsmInfo(); - if (!Subtarget.isThumb() && !Subtarget.hasARMOps()) - report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " - "support ARM mode execution!"); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct ARMGISelActualAccessor : public GISelAccessor { - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -334,24 +266,6 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle); - -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); - GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new ARMLegalizerInfo(*I)); - - auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo()); - - // FIXME: At this point, we can't rely on Subtarget having RBI. - // It's awkward to mix passing RBI and the Subtarget; should we pass - // TII/TRI as well? - GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI)); - - GISel->RegBankInfo.reset(RBI); -#endif - I->setGISelAccessor(*GISel); } return I.get(); } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 2fcee73228fe..f41da3e8e223 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -36,7 +36,6 @@ public: protected: std::unique_ptr<TargetLoweringObjectFile> TLOF; - ARMSubtarget Subtarget; bool isLittle; mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap; @@ -47,8 +46,8 @@ public: CodeGenOpt::Level OL, bool isLittle); ~ARMBaseTargetMachine() override; - const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } const ARMSubtarget *getSubtargetImpl(const Function &F) const override; + const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } /// \brief Get the TargetIRAnalysis for this target. diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index a5b27abeb27f..88bab64ffaf2 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -32,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { const ARMBaseTargetMachine &ARM_TM = static_cast<const ARMBaseTargetMachine &>(TM); bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS; - genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); + // genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(isAAPCS_ABI); @@ -43,16 +43,6 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0); - - // Make code section unreadable when in execute-only mode - if (genExecuteOnly) { - unsigned Type = ELF::SHT_PROGBITS; - unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE; - // Since we cannot modify flags for an existing section, we create a new - // section with the right flags, and use 0 as the unique ID for - // execute-only text - TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U); - } } const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( @@ -74,21 +64,27 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const { getContext()); } -MCSection * -ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO, - SectionKind SK, const TargetMachine &TM) const { +static bool isExecuteOnlyFunction(const GlobalObject *GO, SectionKind SK, + const TargetMachine &TM) { + if (const Function *F = dyn_cast<Function>(GO)) + if (TM.getSubtarget<ARMSubtarget>(*F).genExecuteOnly() && SK.isText()) + return true; + return false; +} + +MCSection *ARMElfTargetObjectFile::getExplicitSectionGlobal( + const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const { // Set execute-only access for the explicit section - if (genExecuteOnly && SK.isText()) + if (isExecuteOnlyFunction(GO, SK, TM)) SK = SectionKind::getExecuteOnly(); return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM); } -MCSection * -ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO, - SectionKind SK, const TargetMachine &TM) const { +MCSection *ARMElfTargetObjectFile::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind SK, const TargetMachine &TM) const { // Place the global in the execute-only text section - if (genExecuteOnly && SK.isText()) + if (isExecuteOnlyFunction(GO, SK, TM)) SK = SectionKind::getExecuteOnly(); return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM); diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index dbb8128269dc..bd7aa1cfe02b 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -16,8 +16,6 @@ namespace llvm { class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { - mutable bool genExecuteOnly = false; - protected: const MCSection *AttributesSection = nullptr; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 585726208a8d..5ab236b7fd4c 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -486,7 +486,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - Size = 0; + Size = 4; return MCDisassembler::Fail; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 81760f03940a..22de728fe06e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -738,13 +738,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, } } -void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) { +bool ARMAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; const unsigned FixupKind = Fixup.getKind() ; - if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { + if ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { assert(Sym && "How did we resolve this?"); // If the symbol is external the linker will handle it. @@ -753,7 +753,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // If the symbol is out of range, produce a relocation and hope the // linker can handle it. GNU AS produces an error in this case. if (Sym->isExternal()) - IsResolved = false; + return true; } // Create relocations for unconditional branches to function symbols with // different execution mode in ELF binaries. @@ -761,12 +761,12 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, unsigned Type = dyn_cast<MCSymbolELF>(Sym)->getType(); if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) - IsResolved = false; + return true; if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || FixupKind == ARM::fixup_arm_thumb_bl || FixupKind == ARM::fixup_t2_condbranch || FixupKind == ARM::fixup_t2_uncondbranch)) - IsResolved = false; + return true; } } // We must always generate a relocation for BL/BLX instructions if we have @@ -776,7 +776,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, FixupKind == ARM::fixup_arm_blx || FixupKind == ARM::fixup_arm_uncondbl || FixupKind == ARM::fixup_arm_condbl)) - IsResolved = false; + return true; + return false; } /// getFixupKindNumBytes - The number of bytes the fixup may change. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 6a0ba2ed41c1..84b54bbb9a49 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -38,10 +38,8 @@ public: const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - /// processFixupValue - Target hook to process the literal value of a fixup - /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, uint64_t Value, bool IsPCRel, diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 9f6c5d7bf920..831589ba0581 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -15,55 +15,47 @@ namespace llvm { namespace ARM { enum Fixups { - // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol - // addresses + // 12-bit PC relative relocation for symbol addresses fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind, - // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with - // the 16-bit halfwords reordered. + // Equivalent to fixup_arm_ldst_pcrel_12, with the 16-bit halfwords reordered. fixup_t2_ldst_pcrel_12, - // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol - // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. + // 10-bit PC relative relocation for symbol addresses used in + // LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. fixup_arm_pcrel_10_unscaled, - // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses - // used in VFP instructions where the lower 2 bits are not encoded - // (so it's encoded as an 8-bit immediate). + // 10-bit PC relative relocation for symbol addresses used in VFP instructions + // where the lower 2 bits are not encoded (so it's encoded as an 8-bit + // immediate). fixup_arm_pcrel_10, - // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for - // the short-swapped encoding of Thumb2 instructions. + // Equivalent to fixup_arm_pcrel_10, accounting for the short-swapped encoding + // of Thumb2 instructions. fixup_t2_pcrel_10, - // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses - // used in VFP instructions where bit 0 not encoded (so it's encoded as an - // 8-bit immediate). + // 9-bit PC relative relocation for symbol addresses used in VFP instructions + // where bit 0 not encoded (so it's encoded as an 8-bit immediate). fixup_arm_pcrel_9, - // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for - // the short-swapped encoding of Thumb2 instructions. + // Equivalent to fixup_arm_pcrel_9, accounting for the short-swapped encoding + // of Thumb2 instructions. fixup_t2_pcrel_9, - // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol - // addresses where the lower 2 bits are not encoded (so it's encoded as an - // 8-bit immediate). + // 10-bit PC relative relocation for symbol addresses where the lower 2 bits + // are not encoded (so it's encoded as an 8-bit immediate). fixup_thumb_adr_pcrel_10, - // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR - // instruction. + // 12-bit PC relative relocation for the ADR instruction. fixup_arm_adr_pcrel_12, - // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR - // instruction. + // 12-bit PC relative relocation for the ADR instruction. fixup_t2_adr_pcrel_12, - // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch - // instructions. + // 24-bit PC relative relocation for conditional branch instructions. fixup_arm_condbranch, - // fixup_arm_uncondbranch - 24-bit PC relative relocation for - // branch instructions. (unconditional) + // 24-bit PC relative relocation for branch instructions. (unconditional) fixup_arm_uncondbranch, - // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct - // uconditional branch instructions. + // 20-bit PC relative relocation for Thumb2 direct uconditional branch + // instructions. fixup_t2_condbranch, - // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct - // branch unconditional branch instructions. + // 20-bit PC relative relocation for Thumb2 direct branch unconditional branch + // instructions. fixup_t2_uncondbranch, - // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. + // 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, // The following fixups handle the ARM BL instructions. These can be @@ -75,42 +67,41 @@ enum Fixups { // MachO does not draw a distinction between the two cases, so it will treat // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. - // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + // Fixup for unconditional ARM BL instructions. fixup_arm_uncondbl, - // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial - // conditionalisation. + // Fixup for ARM BL instructions with nontrivial conditionalisation. fixup_arm_condbl, - // fixup_arm_blx - Fixup for ARM BLX instructions. + // Fixup for ARM BLX instructions. fixup_arm_blx, - // fixup_arm_thumb_bl - Fixup for Thumb BL instructions. + // Fixup for Thumb BL instructions. fixup_arm_thumb_bl, - // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions. + // Fixup for Thumb BLX instructions. fixup_arm_thumb_blx, - // fixup_arm_thumb_cb - Fixup for Thumb branch instructions. + // Fixup for Thumb branch instructions. fixup_arm_thumb_cb, - // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs. + // Fixup for Thumb load/store from constant pool instrs. fixup_arm_thumb_cp, - // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions. + // Fixup for Thumb conditional branching instructions. fixup_arm_thumb_bcc, // The next two are for the movt/movw pair // the 16bit imm field are split into imm{15-12} and imm{11-0} fixup_arm_movt_hi16, // :upper16: fixup_arm_movw_lo16, // :lower16: - fixup_t2_movt_hi16, // :upper16: - fixup_t2_movw_lo16, // :lower16: + fixup_t2_movt_hi16, // :upper16: + fixup_t2_movw_lo16, // :lower16: - // fixup_arm_mod_imm - Fixup for mod_imm + // Fixup for mod_imm fixup_arm_mod_imm, - // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand + // Fixup for Thumb2 8-bit rotated operand fixup_t2_so_imm, // Marker @@ -118,6 +109,6 @@ enum Fixups { NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind }; } -} +} // namespace llvm #endif diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 5c3b45ac2328..d18298385adf 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -230,13 +230,25 @@ void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value, namespace llvm { // Prepare value for the target space for it -void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t &Value, +void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, + const MCValue &Target, + uint64_t &Value, MCContext *Ctx) const { // The size of the fixup in bits. uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize; unsigned Kind = Fixup.getKind(); + // Parsed LLVM-generated temporary labels are already + // adjusted for instruction size, but normal labels aren't. + // + // To handle both cases, we simply un-adjust the temporary label + // case so it acts like all other labels. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isTemporary()) + Value += 2; + } + switch (Kind) { default: llvm_unreachable("unhandled fixup"); @@ -333,9 +345,10 @@ MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { MCELFObjectTargetWriter::getOSABI(OSType)); } -void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel, MCContext &Ctx) const { +void AVRAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const { + adjustFixupValue(Fixup, Target, Value, &Asm.getContext()); if (Value == 0) return; // Doesn't change encoding. @@ -349,7 +362,7 @@ void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. @@ -436,30 +449,16 @@ bool AVRAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -void AVRAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { +bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { switch ((unsigned) Fixup.getKind()) { + default: return false; // Fixups which should always be recorded as relocations. case AVR::fixup_7_pcrel: case AVR::fixup_13_pcrel: case AVR::fixup_call: - IsResolved = false; - break; - default: - // Parsed LLVM-generated temporary labels are already - // adjusted for instruction size, but normal labels aren't. - // - // To handle both cases, we simply un-adjust the temporary label - // case so it acts like all other labels. - if (Target.getSymA()->getSymbol().isTemporary()) - Value += 2; - - adjustFixupValue(Fixup, Value, &Asm.getContext()); - break; + return true; } } diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h index f2be2494684a..4a75e3b0d22d 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h +++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h @@ -35,13 +35,14 @@ public: AVRAsmBackend(Triple::OSType OSType) : MCAsmBackend(), OSType(OSType) {} - void adjustFixupValue(const MCFixup &Fixup, uint64_t &Value, - MCContext *Ctx = nullptr) const; + void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, + uint64_t &Value, MCContext *Ctx = nullptr) const; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; @@ -63,10 +64,8 @@ public: bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override; private: Triple::OSType OSType; diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index c6ddd6bdad5e..f48429ee57b0 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -16,6 +16,7 @@ #include "BPFRegisterInfo.h" #include "BPFSubtarget.h" #include "BPFTargetMachine.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -57,6 +58,11 @@ private: bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + // Node preprocessing cases + void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator I); + void PreprocessCopyToReg(SDNode *Node); + void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator I); + // Find constants from a constant structure typedef std::vector<unsigned char> val_vec_type; bool fillGenericConstant(const DataLayout &DL, const Constant *CV, @@ -69,9 +75,12 @@ private: val_vec_type &Vals, int Offset); bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset, uint64_t Size, unsigned char *ByteSeq); + bool checkLoadDef(unsigned DefReg, unsigned match_load_op); // Mapping from ConstantStruct global value to corresponding byte-list values std::map<const void *, val_vec_type> cs_vals_; + // Mapping from vreg to load memory opcode + std::map<unsigned, unsigned> load_to_vreg_; }; } // namespace @@ -203,89 +212,110 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node, + SelectionDAG::allnodes_iterator I) { + union { + uint8_t c[8]; + uint16_t s; + uint32_t i; + uint64_t d; + } new_val; // hold up the constant values replacing loads. + bool to_replace = false; + SDLoc DL(Node); + const LoadSDNode *LD = cast<LoadSDNode>(Node); + uint64_t size = LD->getMemOperand()->getSize(); + + if (!size || size > 8 || (size & (size - 1))) + return; + + SDNode *LDAddrNode = LD->getOperand(1).getNode(); + // Match LDAddr against either global_addr or (global_addr + offset) + unsigned opcode = LDAddrNode->getOpcode(); + if (opcode == ISD::ADD) { + SDValue OP1 = LDAddrNode->getOperand(0); + SDValue OP2 = LDAddrNode->getOperand(1); + + // We want to find the pattern global_addr + offset + SDNode *OP1N = OP1.getNode(); + if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || OP1N->getNumOperands() == 0) + return; + + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode()); + const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode()); + if (GADN && CDN) + to_replace = + getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c); + } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && + LDAddrNode->getNumOperands() > 0) { + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + SDValue OP1 = LDAddrNode->getOperand(0); + if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(OP1.getNode())) + to_replace = getConstantFieldValue(GADN, 0, size, new_val.c); + } + + if (!to_replace) + return; + + // replacing the old with a new value + uint64_t val; + if (size == 1) + val = new_val.c[0]; + else if (size == 2) + val = new_val.s; + else if (size == 4) + val = new_val.i; + else { + val = new_val.d; + } + + DEBUG(dbgs() << "Replacing load of size " << size << " with constant " << val + << '\n'); + SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + + // After replacement, the current node is dead, we need to + // go backward one step to make iterator still work + I--; + SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)}; + SDValue To[] = {NVal, NVal}; + CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); + I++; + // It is safe to delete node now + CurDAG->DeleteNode(Node); +} + void BPFDAGToDAGISel::PreprocessISelDAG() { - // Iterate through all nodes, only interested in loads from ConstantStruct - // ConstantArray should have converted by IR->DAG processing + // Iterate through all nodes, interested in the following cases: + // + // . loads from ConstantStruct or ConstantArray of constructs + // which can be turns into constant itself, with this we can + // avoid reading from read-only section at runtime. + // + // . reg truncating is often the result of 8/16/32bit->64bit or + // 8/16bit->32bit conversion. If the reg value is loaded with + // masked byte width, the AND operation can be removed since + // BPF LOAD already has zero extension. + // + // This also solved a correctness issue. + // In BPF socket-related program, e.g., __sk_buff->{data, data_end} + // are 32-bit registers, but later on, kernel verifier will rewrite + // it with 64-bit value. Therefore, truncating the value after the + // load will result in incorrect code. for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { SDNode *Node = &*I++; unsigned Opcode = Node->getOpcode(); - if (Opcode != ISD::LOAD) - continue; - - union { - uint8_t c[8]; - uint16_t s; - uint32_t i; - uint64_t d; - } new_val; // hold up the constant values replacing loads. - bool to_replace = false; - SDLoc DL(Node); - const LoadSDNode *LD = cast<LoadSDNode>(Node); - uint64_t size = LD->getMemOperand()->getSize(); - if (!size || size > 8 || (size & (size - 1))) - continue; - - SDNode *LDAddrNode = LD->getOperand(1).getNode(); - // Match LDAddr against either global_addr or (global_addr + offset) - unsigned opcode = LDAddrNode->getOpcode(); - if (opcode == ISD::ADD) { - SDValue OP1 = LDAddrNode->getOperand(0); - SDValue OP2 = LDAddrNode->getOperand(1); - - // We want to find the pattern global_addr + offset - SDNode *OP1N = OP1.getNode(); - if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || - OP1N->getNumOperands() == 0) - continue; - - DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); - - const GlobalAddressSDNode *GADN = - dyn_cast<GlobalAddressSDNode>(OP1N->getOperand(0).getNode()); - const ConstantSDNode *CDN = dyn_cast<ConstantSDNode>(OP2.getNode()); - if (GADN && CDN) - to_replace = - getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c); - } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && - LDAddrNode->getNumOperands() > 0) { - DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); - - SDValue OP1 = LDAddrNode->getOperand(0); - if (const GlobalAddressSDNode *GADN = - dyn_cast<GlobalAddressSDNode>(OP1.getNode())) - to_replace = getConstantFieldValue(GADN, 0, size, new_val.c); - } - - if (!to_replace) - continue; - - // replacing the old with a new value - uint64_t val; - if (size == 1) - val = new_val.c[0]; - else if (size == 2) - val = new_val.s; - else if (size == 4) - val = new_val.i; - else { - val = new_val.d; - } - - DEBUG(dbgs() << "Replacing load of size " << size << " with constant " - << val << '\n'); - SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); - - // After replacement, the current node is dead, we need to - // go backward one step to make iterator still work - I--; - SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)}; - SDValue To[] = {NVal, NVal}; - CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); - I++; - // It is safe to delete node now - CurDAG->DeleteNode(Node); + if (Opcode == ISD::LOAD) + PreprocessLoad(Node, I); + else if (Opcode == ISD::CopyToReg) + PreprocessCopyToReg(Node); + else if (Opcode == ISD::AND) + PreprocessTrunc(Node, I); } } @@ -415,6 +445,134 @@ bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL, return true; } +void BPFDAGToDAGISel::PreprocessCopyToReg(SDNode *Node) { + const RegisterSDNode *RegN = dyn_cast<RegisterSDNode>(Node->getOperand(1)); + if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) + return; + + const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node->getOperand(2)); + if (!LD) + return; + + // Assign a load value to a virtual register. record its load width + unsigned mem_load_op = 0; + switch (LD->getMemOperand()->getSize()) { + default: + return; + case 4: + mem_load_op = BPF::LDW; + break; + case 2: + mem_load_op = BPF::LDH; + break; + case 1: + mem_load_op = BPF::LDB; + break; + } + + DEBUG(dbgs() << "Find Load Value to VReg " + << TargetRegisterInfo::virtReg2Index(RegN->getReg()) << '\n'); + load_to_vreg_[RegN->getReg()] = mem_load_op; +} + +void BPFDAGToDAGISel::PreprocessTrunc(SDNode *Node, + SelectionDAG::allnodes_iterator I) { + ConstantSDNode *MaskN = dyn_cast<ConstantSDNode>(Node->getOperand(1)); + if (!MaskN) + return; + + unsigned match_load_op = 0; + switch (MaskN->getZExtValue()) { + default: + return; + case 0xFFFFFFFF: + match_load_op = BPF::LDW; + break; + case 0xFFFF: + match_load_op = BPF::LDH; + break; + case 0xFF: + match_load_op = BPF::LDB; + break; + } + + // The Reg operand should be a virtual register, which is defined + // outside the current basic block. DAG combiner has done a pretty + // good job in removing truncating inside a single basic block. + SDValue BaseV = Node->getOperand(0); + if (BaseV.getOpcode() != ISD::CopyFromReg) + return; + + const RegisterSDNode *RegN = + dyn_cast<RegisterSDNode>(BaseV.getNode()->getOperand(1)); + if (!RegN || !TargetRegisterInfo::isVirtualRegister(RegN->getReg())) + return; + unsigned AndOpReg = RegN->getReg(); + DEBUG(dbgs() << "Examine %vreg" << TargetRegisterInfo::virtReg2Index(AndOpReg) + << '\n'); + + // Examine the PHI insns in the MachineBasicBlock to found out the + // definitions of this virtual register. At this stage (DAG2DAG + // transformation), only PHI machine insns are available in the machine basic + // block. + MachineBasicBlock *MBB = FuncInfo->MBB; + MachineInstr *MII = nullptr; + for (auto &MI : *MBB) { + for (unsigned i = 0; i < MI.getNumOperands(); ++i) { + const MachineOperand &MOP = MI.getOperand(i); + if (!MOP.isReg() || !MOP.isDef()) + continue; + unsigned Reg = MOP.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && Reg == AndOpReg) { + MII = &MI; + break; + } + } + } + + if (MII == nullptr) { + // No phi definition in this block. + if (!checkLoadDef(AndOpReg, match_load_op)) + return; + } else { + // The PHI node looks like: + // %vreg2<def> = PHI %vreg0, <BB#1>, %vreg1, <BB#3> + // Trace each incoming definition, e.g., (%vreg0, BB#1) and (%vreg1, BB#3) + // The AND operation can be removed if both %vreg0 in BB#1 and %vreg1 in + // BB#3 are defined with with a load matching the MaskN. + DEBUG(dbgs() << "Check PHI Insn: "; MII->dump(); dbgs() << '\n'); + unsigned PrevReg = -1; + for (unsigned i = 0; i < MII->getNumOperands(); ++i) { + const MachineOperand &MOP = MII->getOperand(i); + if (MOP.isReg()) { + if (MOP.isDef()) + continue; + PrevReg = MOP.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(PrevReg)) + return; + if (!checkLoadDef(PrevReg, match_load_op)) + return; + } + } + } + + DEBUG(dbgs() << "Remove the redundant AND operation in: "; Node->dump(); + dbgs() << '\n'); + + I--; + CurDAG->ReplaceAllUsesWith(SDValue(Node, 0), BaseV); + I++; + CurDAG->DeleteNode(Node); +} + +bool BPFDAGToDAGISel::checkLoadDef(unsigned DefReg, unsigned match_load_op) { + auto it = load_to_vreg_.find(DefReg); + if (it == load_to_vreg_.end()) + return false; // The definition of register is not exported yet. + + return it->second == match_load_op; +} + FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) { return new BPFDAGToDAGISel(TM); } diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 2b0ceaa66258..97a53dcbaed7 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -178,8 +178,8 @@ static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), cl::init(false), cl::ZeroOrMore); -static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), - cl::Hidden, cl::desc("Use allocframe more conservatively")); +static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true), + cl::Hidden, cl::desc("Refrain from using FP whenever possible")); static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, cl::init(true), cl::desc("Optimize spill slots")); @@ -550,7 +550,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); - DebugLoc dl; unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment()); @@ -584,77 +583,56 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, MI->eraseFromParent(); } - if (!hasFP(MF)) - return; - - // Check for overflow. - // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? - const unsigned int ALLOCFRAME_MAX = 16384; + DebugLoc dl = MBB.findDebugLoc(InsertPt); - // Create a dummy memory operand to avoid allocframe from being treated as - // a volatile memory reference. - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, - 4, 4); - - if (NumBytes >= ALLOCFRAME_MAX) { - // Emit allocframe(#0). - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) - .addImm(0) - .addMemOperand(MMO); - - // Subtract offset from frame pointer. - // We use a caller-saved non-parameter register for that. - unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg(); - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32), - CallerSavedReg).addImm(NumBytes); - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP) + if (hasFP(MF)) { + insertAllocframe(MBB, InsertPt, NumBytes); + if (AlignStack) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) + .addReg(SP) + .addImm(-int64_t(MaxAlign)); + } + // If the stack-checking is enabled, and we spilled the callee-saved + // registers inline (i.e. did not use a spill function), then call + // the stack checker directly. + if (EnableStackOVFSanitizer && !PrologueStubs) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) + .addExternalSymbol("__runtime_stack_check"); + } else if (NumBytes > 0) { + assert(alignTo(NumBytes, 8) == NumBytes); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) .addReg(SP) - .addReg(CallerSavedReg); - } else { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) - .addImm(NumBytes) - .addMemOperand(MMO); + .addImm(-int(NumBytes)); } - - if (AlignStack) { - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) - .addReg(SP) - .addImm(-int64_t(MaxAlign)); - } - - // If the stack-checking is enabled, and we spilled the callee-saved - // registers inline (i.e. did not use a spill function), then call - // the stack checker directly. - if (EnableStackOVFSanitizer && !PrologueStubs) - BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) - .addExternalSymbol("__runtime_stack_check"); } void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { MachineFunction &MF = *MBB.getParent(); - if (!hasFP(MF)) - return; - auto &HST = MF.getSubtarget<HexagonSubtarget>(); auto &HII = *HST.getInstrInfo(); auto &HRI = *HST.getRegisterInfo(); unsigned SP = HRI.getStackRegister(); + MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); + DebugLoc dl = MBB.findDebugLoc(InsertPt); + + if (!hasFP(MF)) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + if (unsigned NumBytes = MFI.getStackSize()) { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) + .addReg(SP) + .addImm(NumBytes); + } + return; + } + MachineInstr *RetI = getReturn(MBB); unsigned RetOpc = RetI ? RetI->getOpcode() : 0; - MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); - DebugLoc DL; - if (InsertPt != MBB.end()) - DL = InsertPt->getDebugLoc(); - else if (!MBB.empty()) - DL = std::prev(MBB.end())->getDebugLoc(); - // Handle EH_RETURN. if (RetOpc == Hexagon::EH_RETURN_JMPR) { - BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); - BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP) + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP) .addReg(SP) .addReg(Hexagon::R28); return; @@ -699,16 +677,52 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { // otherwise just add deallocframe. The function could be returning via a // tail call. if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { - BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)); return; } unsigned NewOpc = Hexagon::L4_return; - MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc)); + MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)); // Transfer the function live-out registers. NewI->copyImplicitOps(MF, *RetI); MBB.erase(RetI); } +void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const { + MachineFunction &MF = *MBB.getParent(); + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + auto &HII = *HST.getInstrInfo(); + auto &HRI = *HST.getRegisterInfo(); + + // Check for overflow. + // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? + const unsigned int ALLOCFRAME_MAX = 16384; + + // Create a dummy memory operand to avoid allocframe from being treated as + // a volatile memory reference. + auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0), + MachineMemOperand::MOStore, 4, 4); + + DebugLoc dl = MBB.findDebugLoc(InsertPt); + + if (NumBytes >= ALLOCFRAME_MAX) { + // Emit allocframe(#0). + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(0) + .addMemOperand(MMO); + + // Subtract the size from the stack pointer. + unsigned SP = HRI.getStackRegister(); + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) + .addReg(SP) + .addImm(-int(NumBytes)); + } else { + BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) + .addImm(NumBytes) + .addMemOperand(MMO); + } +} + void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF, MachineBasicBlock &SaveB) const { SetVector<unsigned> Worklist; @@ -928,12 +942,11 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, } bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + return false; + auto &MFI = MF.getFrameInfo(); auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); - - bool HasFixed = MFI.getNumFixedObjects(); - bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI) - .getLocalFrameObjectCount(); bool HasExtraAlign = HRI.needsStackRealignment(MF); bool HasAlloca = MFI.hasVarSizedObjects(); @@ -947,18 +960,35 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { // By default we want to use SP (since it's always there). FP requires // some setup (i.e. ALLOCFRAME). - // Fixed and preallocated objects need FP if the distance from them to - // the SP is unknown (as is with alloca or aligna). - if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign)) + // Both, alloca and stack alignment modify the stack pointer by an + // undetermined value, so we need to save it at the entry to the function + // (i.e. use allocframe). + if (HasAlloca || HasExtraAlign) return true; if (MFI.getStackSize() > 0) { - if (EnableStackOVFSanitizer || UseAllocframe) + // If FP-elimination is disabled, we have to use FP at this point. + const TargetMachine &TM = MF.getTarget(); + if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer) + return true; + if (EnableStackOVFSanitizer) return true; } - if (MFI.hasCalls() || - MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR()) + const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); + if (MFI.hasCalls() || HMFI.hasClobberLR()) + return true; + + // Frame pointer elimination is a possiblility at this point, but + // to know if FP is necessary we need to know if spill/restore + // functions will be used (they require FP to be valid). + // This means that hasFP shouldn't really be called before CSI is + // calculated, and some measures are taken to make sure of that + // (e.g. default implementations of virtual functions that call it + // are overridden apropriately). + assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI"); + const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); + if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI)) return true; return false; @@ -1051,9 +1081,10 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, bool HasExtraAlign = HRI.needsStackRealignment(MF); bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; - unsigned FrameSize = MFI.getStackSize(); - unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); + unsigned FrameSize = MFI.getStackSize(); + unsigned SP = HRI.getStackRegister(); + unsigned FP = HRI.getFrameRegister(); unsigned AP = HMFI.getStackAlignBasePhysReg(); // It may happen that AP will be absent even HasAlloca && HasExtraAlign // is true. HasExtraAlign may be set because of vector spills, without @@ -1135,7 +1166,7 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, // there will be no SP -= FrameSize), so the frame size should not be // added to the calculated offset. int RealOffset = Offset; - if (!UseFP && !UseAP && HasFP) + if (!UseFP && !UseAP) RealOffset = FrameSize+Offset; return RealOffset; } @@ -2402,7 +2433,7 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, /// be generated via inline code. If this function returns "true", inline /// code will be generated. If this function returns "false", additional /// checks are performed, which may still lead to the inline code. -bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, +bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const { if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) return true; @@ -2432,7 +2463,7 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, return false; } -bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, +bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) return false; @@ -2445,7 +2476,7 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, return Threshold < NumCSI; } -bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, +bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const { if (shouldInlineCSR(MF, CSI)) return false; diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 529a61d4a5b5..f4d4e1b61a26 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -48,6 +48,15 @@ public: return true; } + bool hasReservedCallFrame(const MachineFunction &MF) const override { + // We always reserve call frame as a part of the initial stack allocation. + return true; + } + bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override { + // Override this function to avoid calling hasFP before CSI is set + // (the default implementation calls hasFP). + return true; + } MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override; @@ -94,6 +103,8 @@ private: unsigned SP, unsigned CF) const; void insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const; void insertEpilogueInBlock(MachineBasicBlock &MBB) const; + void insertAllocframe(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const; bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, const HexagonRegisterInfo &HRI, bool &PrologueStubs) const; bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI, @@ -148,9 +159,9 @@ private: void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI, bool IsDef, bool IsKill) const; - bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const; - bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const; - bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const; + bool shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const; + bool useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const; + bool useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const; bool mayOverflowFrameOffset(MachineFunction &MF) const; }; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index afed894cfb9a..2daacf795555 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1002,51 +1002,46 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); MachineFunction &MF = DAG.getMachineFunction(); - auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); - switch (Node->getOpcode()) { - case ISD::INLINEASM: { - unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) - --NumOps; // Ignore the flag operand. - - for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { - if (FuncInfo.hasClobberLR()) - break; - unsigned Flags = - cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); - ++i; // Skip the ID value. - - switch (InlineAsm::getKind(Flags)) { - default: llvm_unreachable("Bad flags!"); - case InlineAsm::Kind_RegDef: - case InlineAsm::Kind_RegUse: - case InlineAsm::Kind_Imm: - case InlineAsm::Kind_Clobber: - case InlineAsm::Kind_Mem: { - for (; NumVals; --NumVals, ++i) {} - break; - } - case InlineAsm::Kind_RegDefEarlyClobber: { - for (; NumVals; --NumVals, ++i) { - unsigned Reg = - cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - - // Check it to be lr - const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo(); - if (Reg == QRI->getRARegister()) { - FuncInfo.setHasClobberLR(true); - break; - } - } - break; - } + auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); + unsigned LR = HRI.getRARegister(); + + if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR()) + return Op; + + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the flag operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg(); + if (Reg != LR) + continue; + HMFI.setHasClobberLR(true); + return Op; } + break; } } - } // Node->getOpcode + } + return Op; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index fec2dc5ce306..1eac2d3dd8e2 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1253,10 +1253,16 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); + unsigned PReg = Op1.getReg(); + assert(Op1.getSubReg() == 0); + unsigned PState = getRegState(Op1); + if (Op0.getReg() != Op2.getReg()) { + unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill + : PState; auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov)) .add(Op0) - .add(Op1) + .addReg(PReg, S) .add(Op2); if (IsDestLive) T.addReg(Op0.getReg(), RegState::Implicit); @@ -1265,7 +1271,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if (Op0.getReg() != Op3.getReg()) { auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov)) .add(Op0) - .add(Op1) + .addReg(PReg, PState) .add(Op3); if (IsDestLive) T.addReg(Op0.getReg(), RegState::Implicit); @@ -1282,12 +1288,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); + unsigned PReg = Op1.getReg(); + assert(Op1.getSubReg() == 0); + unsigned PState = getRegState(Op1); if (Op0.getReg() != Op2.getReg()) { + unsigned S = Op0.getReg() != Op3.getReg() ? PState & ~RegState::Kill + : PState; unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo); unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine)) .add(Op0) + .addReg(PReg, S) .add(Op1) .addReg(SrcHi) .addReg(SrcLo); @@ -1300,7 +1312,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi); auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine)) .add(Op0) - .add(Op1) + .addReg(PReg, PState) .addReg(SrcHi) .addReg(SrcLo); if (IsDestLive) diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index de6b203015d8..e93f075f4ccd 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -69,9 +69,7 @@ namespace { public: static char ID; - HexagonNewValueJump() : MachineFunctionPass(ID) { - initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry()); - } + HexagonNewValueJump() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineBranchProbabilityInfo>(); @@ -445,8 +443,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { unsigned predReg = 0; // predicate reg of the jump. unsigned cmpReg1 = 0; int cmpOp2 = 0; - bool MO1IsKill = false; - bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; @@ -548,14 +544,10 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // We need cmpReg1 and cmpOp2(imm or reg) while building // new value jump instruction. cmpReg1 = MI.getOperand(1).getReg(); - if (MI.getOperand(1).isKill()) - MO1IsKill = true; - if (isSecondOpReg) { + if (isSecondOpReg) cmpOp2 = MI.getOperand(2).getReg(); - if (MI.getOperand(2).isKill()) - MO2IsKill = true; - } else + else cmpOp2 = MI.getOperand(2).getImm(); continue; } @@ -605,11 +597,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) && (feederReg == (unsigned) cmpOp2)) { unsigned tmp = cmpReg1; - bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; - MO1IsKill = MO2IsKill; cmpOp2 = tmp; - MO2IsKill = tmpIsKill; } // Now we have swapped the operands, all we need to check is, @@ -623,31 +612,33 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // make sure we are respecting the kill values of // the operands of the feeder. - bool updatedIsKill = false; - for (unsigned i = 0; i < MI.getNumOperands(); i++) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.isUse()) { - unsigned feederReg = MO.getReg(); - for (MachineBasicBlock::iterator localII = feederPos, - end = cmpInstr->getIterator(); localII != end; localII++) { - MachineInstr &localMI = *localII; - for (unsigned j = 0; j < localMI.getNumOperands(); j++) { - MachineOperand &localMO = localMI.getOperand(j); - if (localMO.isReg() && localMO.isUse() && - localMO.isKill() && feederReg == localMO.getReg()) { - // We found that there is kill of a use register - // Set up a kill flag on the register - localMO.setIsKill(false); - MO.setIsKill(); - updatedIsKill = true; - break; - } + auto TransferKills = [jmpPos,cmpPos] (MachineInstr &MI) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned UseR = MO.getReg(); + for (auto I = std::next(MI.getIterator()); I != jmpPos; ++I) { + if (I == cmpPos) + continue; + for (MachineOperand &Op : I->operands()) { + if (!Op.isReg() || !Op.isUse() || !Op.isKill()) + continue; + if (Op.getReg() != UseR) + continue; + // We found that there is kill of a use register + // Set up a kill flag on the register + Op.setIsKill(false); + MO.setIsKill(true); + return; } - if (updatedIsKill) break; } } - if (updatedIsKill) break; - } + }; + + TransferKills(*feederPos); + TransferKills(*cmpPos); + bool MO1IsKill = cmpPos->killsRegister(cmpReg1, QRI); + bool MO2IsKill = isSecondOpReg && cmpPos->killsRegister(cmpOp2, QRI); MBB->splice(jmpPos, MI.getParent(), MI); MBB->splice(jmpPos, MI.getParent(), cmpInstr); diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 27b40f134b1f..a331c978f59d 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -535,9 +535,9 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { !MI->getOperand(1).isGlobal()) continue; - DEBUG(dbgs() << "[Analyzing A2_tfrsi]: " << *MI << "\n"); - DEBUG(dbgs() << "\t[InstrNode]: " << Print<NodeAddr<InstrNode *>>(IA, *DFG) - << "\n"); + DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) << "]: " + << *MI << "\n\t[InstrNode]: " + << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n'); NodeList UNodeList; getAllRealUses(SA, UNodeList); @@ -605,7 +605,9 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { const TargetOperandInfo TOI(*HII); DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI); - G.build(); + // Need to keep dead phis because we can propagate uses of registers into + // nodes dominated by those would-be phis. + G.build(BuildOptions::KeepDeadPhis); DFG = &G; Liveness L(MRI, *DFG); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 031a1bdefafb..76d9b31b005f 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -113,6 +113,7 @@ namespace llvm { void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); void initializeHexagonOptAddrModePass(PassRegistry&); + void initializeHexagonNewValueJumpPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); FunctionPass *createHexagonBitSimplify(); @@ -158,6 +159,7 @@ extern "C" void LLVMInitializeHexagonTarget() { initializeHexagonLoopIdiomRecognizePass(PR); initializeHexagonGenMuxPass(PR); initializeHexagonOptAddrModePass(PR); + initializeHexagonNewValueJumpPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index 4dacb1501392..34df2ebcc520 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -49,6 +49,10 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement", cl::Hidden, cl::init(false), cl::desc("Trace global value placement")); +static cl::opt<bool> + EmitJtInText("hexagon-emit-jt-text", cl::Hidden, cl::init(false), + cl::desc("Emit hexagon jump tables in function section")); + // TraceGVPlacement controls messages for all builds. For builds with assertions // (debug or release), messages are also controlled by the usual debug flags // (e.g. -debug and -debug-only=globallayout) @@ -256,6 +260,11 @@ unsigned HexagonTargetObjectFile::getSmallDataSize() const { return SmallDataThreshold; } +bool HexagonTargetObjectFile::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + return EmitJtInText; +} + /// Descends any type down to "elementary" components, /// discovering the smallest addressable one. /// If zero is returned, declaration will not be modified. diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h index 58dff2b95e19..373d850b53be 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.h +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h @@ -33,6 +33,9 @@ namespace llvm { unsigned getSmallDataSize() const; + bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, + const Function &F) const override; + private: MCSectionELF *SmallDataSection; MCSectionELF *SmallBSSSection; diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index d578bfab3658..aac810e29fe9 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -21,6 +21,10 @@ using namespace llvm; #define DEBUG_TYPE "hexagontti" +static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables", + cl::init(true), cl::Hidden, + cl::desc("Control lookup table emission on Hexagon target")); + TargetTransformInfo::PopcntSupportKind HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { // Return Fast Hardware support as every input < 64 bits will be promoted @@ -29,7 +33,7 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { } // The Hexagon target can unroll loops with run-time trip counts. -void HexagonTTIImpl::getUnrollingPreferences(Loop *L, +void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Runtime = UP.Partial = true; } @@ -46,8 +50,9 @@ unsigned HexagonTTIImpl::getCacheLineSize() const { return getST()->getL1CacheLineSize(); } -int HexagonTTIImpl::getUserCost(const User *U) { - auto isCastFoldedIntoLoad = [] (const CastInst *CI) -> bool { +int HexagonTTIImpl::getUserCost(const User *U, + ArrayRef<const Value *> Operands) { + auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0)); @@ -67,5 +72,9 @@ int HexagonTTIImpl::getUserCost(const User *U) { if (const CastInst *CI = dyn_cast<const CastInst>(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U); + return BaseT::getUserCost(U, Operands); +} + +bool HexagonTTIImpl::shouldBuildLookupTables() const { + return EmitLookupTables; } diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 8414bfc4e197..ab5a6e07d873 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -46,7 +46,8 @@ public: TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; // The Hexagon target can unroll loops with run-time trip counts. - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); // L1 cache prefetch. unsigned getPrefetchDistance() const; @@ -61,7 +62,10 @@ public: /// @} - int getUserCost(const User *U); + int getUserCost(const User *U, ArrayRef<const Value *> Operands); + + // Hexagon specific decision to generate a lookup table. + bool shouldBuildLookupTables() const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 093ce80bc2e3..34d0b55aa22a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -199,11 +199,8 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - /// processFixupValue - Target hook to adjust the literal value of a fixup - /// if necessary. IsResolved signals whether the caller believes a relocation - /// is needed; the target can modify the value. The default does nothing. - void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) override { + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override { MCFixupKind Kind = Fixup.getKind(); switch((unsigned)Kind) { @@ -299,8 +296,7 @@ public: case fixup_Hexagon_LD_PLT_B22_PCREL_X: case fixup_Hexagon_LD_PLT_B32_PCREL_X: // These relocations should always have a relocation recorded - IsResolved = false; - return; + return true; case fixup_Hexagon_B22_PCREL: //IsResolved = false; @@ -317,7 +313,7 @@ public: case fixup_Hexagon_B7_PCREL: case fixup_Hexagon_B7_PCREL_X: if (DisableFixup) - IsResolved = false; + return true; break; case FK_Data_1: @@ -326,8 +322,9 @@ public: case FK_PCRel_4: case fixup_Hexagon_32: // Leave these relocations alone as they are used for EH. - return; + return false; } + return false; } /// getFixupKindNumBytes - The number of bytes the fixup may change. diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 9d5c179a0fd9..69b1ba1528d0 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -2789,6 +2789,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, bool Is32BitSym, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { + // FIXME: These expansions do not respect -mxgot. MipsTargetStreamer &TOut = getTargetStreamer(); bool UseSrcReg = SrcReg != Mips::NoRegister; warnIfNoMacro(IDLoc); @@ -2808,8 +2809,12 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, // symbol in the final relocation is external and not modified with a // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT16. if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && - Res.getConstant() == 0 && !Res.getSymA()->getSymbol().isInSection() && - !Res.getSymA()->getSymbol().isTemporary()) { + Res.getConstant() == 0 && + !(Res.getSymA()->getSymbol().isInSection() || + Res.getSymA()->getSymbol().isTemporary() || + (Res.getSymA()->getSymbol().isELF() && + cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() == + ELF::STB_LOCAL))) { const MCExpr *CallExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(), @@ -2865,6 +2870,85 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, return false; } + if (inPicMode() && ABI.ArePtrs64bit()) { + MCValue Res; + if (!SymExpr->evaluateAsRelocatable(Res, nullptr, nullptr)) { + Error(IDLoc, "expected relocatable expression"); + return true; + } + if (Res.getSymB() != nullptr) { + Error(IDLoc, "expected relocatable expression with only one symbol"); + return true; + } + + // The case where the result register is $25 is somewhat special. If the + // symbol in the final relocation is external and not modified with a + // constant then we must use R_MIPS_CALL16 instead of R_MIPS_GOT_DISP. + if ((DstReg == Mips::T9 || DstReg == Mips::T9_64) && !UseSrcReg && + Res.getConstant() == 0 && + !(Res.getSymA()->getSymbol().isInSection() || + Res.getSymA()->getSymbol().isTemporary() || + (Res.getSymA()->getSymbol().isELF() && + cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() == + ELF::STB_LOCAL))) { + const MCExpr *CallExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); + TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(), + MCOperand::createExpr(CallExpr), IDLoc, STI); + return false; + } + + // The remaining cases are: + // Small offset: ld $tmp, %got_disp(symbol)($gp) + // >daddiu $tmp, $tmp, offset + // >daddu $rd, $tmp, $rs + // The daddiu's marked with a '>' may be omitted if they are redundant. If + // this happens then the last instruction must use $rd as the result + // register. + const MipsMCExpr *GotExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, + Res.getSymA(), + getContext()); + const MCExpr *LoExpr = nullptr; + if (Res.getConstant() != 0) { + // Symbols fully resolve with just the %got_disp(symbol) but we + // must still account for any offset to the symbol for + // expressions like symbol+8. + LoExpr = MCConstantExpr::create(Res.getConstant(), getContext()); + + // FIXME: Offsets greater than 16 bits are not yet implemented. + // FIXME: The correct range is a 32-bit sign-extended number. + if (Res.getConstant() < -0x8000 || Res.getConstant() > 0x7fff) { + Error(IDLoc, "macro instruction uses large offset, which is not " + "currently supported"); + return true; + } + } + + unsigned TmpReg = DstReg; + if (UseSrcReg && + getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, + SrcReg)) { + // If $rs is the same as $rd, we need to use AT. + // If it is not available we exit. + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + TmpReg = ATReg; + } + + TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(), + MCOperand::createExpr(GotExpr), IDLoc, STI); + + if (LoExpr) + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), + IDLoc, STI); + + if (UseSrcReg) + TOut.emitRRR(Mips::DADDu, DstReg, TmpReg, SrcReg, IDLoc, STI); + + return false; + } + const MipsMCExpr *HiExpr = MipsMCExpr::create(MipsMCExpr::MEK_HI, SymExpr, getContext()); const MipsMCExpr *LoExpr = diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td index 6b7f39e9dd79..38b09d105ddd 100644 --- a/lib/Target/Mips/MicroMips64r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td @@ -548,3 +548,15 @@ def : MipsInstAlias<"dnegu $rt, $rs", def : MipsInstAlias<"dnegu $rt", (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsll $rd, $rt, $rs", + (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt, + GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsrl $rd, $rt, $rs", + (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rt, + GPR32Opnd:$rs), 0>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsrl $rd, $rt", + (DSRLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd, + GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6; +def : MipsInstAlias<"dsll $rd, $rt", + (DSLLV_MM64R6 GPR64Opnd:$rd, GPR64Opnd:$rd, + GPR32Opnd:$rt), 0>, ISA_MICROMIPS64R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 99025fe1341d..3dba7ce30cad 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -748,9 +748,6 @@ let AdditionalPredicates = [NotInMicroMips] in { defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>, GPR_64; } -def : MipsInstAlias<"dsll $rd, $rt, $rs", - (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, - ISA_MIPS3; let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"dneg $rt, $rs", (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>, @@ -793,9 +790,18 @@ def : MipsInstAlias<"dsra $rd, $rt, $rs", (DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; let AdditionalPredicates = [NotInMicroMips] in { + def : MipsInstAlias<"dsll $rd, $rt, $rs", + (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, + ISA_MIPS3; def : MipsInstAlias<"dsrl $rd, $rt, $rs", (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; + def : MipsInstAlias<"dsrl $rd, $rt", + (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>, + ISA_MIPS3; + def : MipsInstAlias<"dsll $rd, $rt", + (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rd, GPR32Opnd:$rt), 0>, + ISA_MIPS3; // Two operand (implicit 0 selector) versions: def : MipsInstAlias<"dmtc0 $rt, $rd", diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index 5d82571ff94f..4a34e3101cb8 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -564,7 +564,7 @@ Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch, // For given opcode returns opcode of corresponding instruction with short // delay slot. -// For the pseudo TAILCALL*_MM instrunctions return the short delay slot +// For the pseudo TAILCALL*_MM instructions return the short delay slot // form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range // that is too short to make use of for tail calls. static int getEquivalentCallShort(int Opcode) { diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 02102d6b22f4..a6ec9fb2e598 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -364,18 +364,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); - if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) { - setOperationAction(ISD::ADDC, MVT::i32, Expand); - setOperationAction(ISD::ADDE, MVT::i32, Expand); - } - - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - setOperationAction(ISD::SUBC, MVT::i32, Expand); - setOperationAction(ISD::SUBE, MVT::i32, Expand); - setOperationAction(ISD::SUBC, MVT::i64, Expand); - setOperationAction(ISD::SUBE, MVT::i64, Expand); - // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -481,7 +469,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); @@ -936,130 +923,14 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, } } -static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, - const MipsSubtarget &Subtarget) { - // ROOTNode must have a multiplication as an operand for the match to be - // successful. - if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && - ROOTNode->getOperand(1).getOpcode() != ISD::MUL) - return SDValue(); - - // We don't handle vector types here. - if (ROOTNode->getValueType(0).isVector()) - return SDValue(); - - // For MIPS64, madd / msub instructions are inefficent to use with 64 bit - // arithmetic. E.g. - // (add (mul a b) c) => - // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in - // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) - // or - // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) - // - // The overhead of setting up the Hi/Lo registers and reassembling the - // result makes this a dubious optimzation for MIPS64. The core of the - // problem is that Hi/Lo contain the upper and lower 32 bits of the - // operand and result. - // - // It requires a chain of 4 add/mul for MIPS64R2 to get better code - // density than doing it naively, 5 for MIPS64. Additionally, using - // madd/msub on MIPS64 requires the operands actually be 32 bit sign - // extended operands, not true 64 bit values. - // - // FIXME: For the moment, disable this completely for MIPS64. - if (Subtarget.hasMips64()) - return SDValue(); - - SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL - ? ROOTNode->getOperand(0) - : ROOTNode->getOperand(1); - - SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL - ? ROOTNode->getOperand(1) - : ROOTNode->getOperand(0); - - // Transform this to a MADD only if the user of this node is the add. - // If there are other users of the mul, this function returns here. - if (!Mult.hasOneUse()) - return SDValue(); - - // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 - // must be in canonical form, i.e. sign extended. For MIPS32, the operands - // of the multiply must have 32 or more sign bits, otherwise we cannot - // perform this optimization. We have to check this here as we're performing - // this optimization pre-legalization. - SDValue MultLHS = Mult->getOperand(0); - SDValue MultRHS = Mult->getOperand(1); - unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS); - unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS); - - if (LHSSB < 32 || RHSSB < 32) - return SDValue(); - - APInt HighMask = - APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32); - bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) && - CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) && - CurDAG.MaskedValueIsZero(AddOperand, HighMask); - - // Initialize accumulator. - SDLoc DL(ROOTNode); - SDValue TopHalf; - SDValue BottomHalf; - BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, - CurDAG.getIntPtrConstant(0, DL)); - - TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, - CurDAG.getIntPtrConstant(1, DL)); - SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - BottomHalf, - TopHalf); - - // Create MipsMAdd(u) / MipsMSub(u) node. - bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; - unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) - : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); - SDValue MAddOps[3] = { - CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), - CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; - EVT VTs[2] = {MVT::i32, MVT::i32}; - SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); - - SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); - SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); - SDValue Combined = - CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); - return Combined; -} - -static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) - if (DCI.isBeforeLegalizeOps()) { - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) - return performMADD_MSUBCombine(N, DAG, Subtarget); - - return SDValue(); - } - - return SDValue(); -} - static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { - // (add v0 (mul v1, v2)) => (madd v1, v2, v0) - if (DCI.isBeforeLegalizeOps()) { - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) - return performMADD_MSUBCombine(N, DAG, Subtarget); + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + if (DCI.isBeforeLegalizeOps()) return SDValue(); - } - // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) @@ -1187,8 +1058,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performAssertZextCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); - case ISD::SUB: - return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 4be26dd25dc0..49ae6dd4cd39 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -245,64 +245,46 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { } } -void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { - SDValue InFlag = Node->getOperand(2); - unsigned Opc = InFlag.getOpcode(); +void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, const SDLoc &DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; + if (Subtarget->isGP64bit()) { + SLTuOp = Mips::SLTu64; + ADDuOp = Mips::DADDu; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - // In the base case, we can rely on the carry bit from the addsc - // instruction. - if (Opc == ISD::ADDC) { - SDValue Ops[3] = {LHS, RHS, InFlag}; - CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); - return; + SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); + + if (Subtarget->isGP64bit()) { + // On 64-bit targets, sltu produces an i64 but our backend currently says + // that SLTu64 produces an i32. We need to fix this in the long run but for + // now, just make the DAG type-correct by asserting the upper bits are zero. + Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, + CurDAG->getTargetConstant(0, DL, VT), + SDValue(Carry, 0), + CurDAG->getTargetConstant(Mips::sub_32, DL, + VT)); } - assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); - - // The more complex case is when there is a chain of ISD::ADDE nodes like: - // (adde (adde (adde (addc a b) c) d) e). - // - // The addwc instruction does not write to the carry bit, instead it writes - // to bit 20 of the dsp control register. To match this series of nodes, each - // intermediate adde node must be expanded to write the carry bit before the - // addition. - - // Start by reading the overflow field for addsc and moving the value to the - // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP - // corresponds to reading/writing the entire control register to/from a GPR. - - SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); - - SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); - - SDNode *DSPCtrlField = - CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); - - SDNode *Carry = CurDAG->getMachineNode( - Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); + // Generate a second addition only if we know that RHS is not a + // constant-zero node. + SDNode *AddCarry = Carry; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + if (!C || C->getZExtValue()) + AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); - SDValue Ops[4] = {SDValue(DSPCtrlField, 0), - CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, - SDValue(Carry, 0)}; - SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); - - // My reading of the the MIPS DSP 3.01 specification isn't as clear as I - // would like about whether bit 20 always gets overwritten by addwc. - // Hence take an extremely conservative view and presume it's sticky. We - // therefore need to clear it. - - SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); - - SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; - SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); - - SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, - SDValue(DSPCtrlFinal, 0), CstOne); - - SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; - CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); + CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); } /// Match frameindex @@ -783,8 +765,19 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { switch(Opcode) { default: break; + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; + selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); + return true; + } + case ISD::ADDE: { - selectAddE(Node, DL); + if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. + break; + SDValue InFlag = Node->getOperand(2); + unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; + selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); return true; } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 6f38289c5a45..f89a350cab04 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -41,7 +41,8 @@ private: const SDLoc &dl, EVT Ty, bool HasLo, bool HasHi); - void selectAddE(SDNode *Node, const SDLoc &DL) const; + void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + const SDLoc &DL, SDNode *Node) const; bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index b57bceb3c837..06a97b9d123e 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -179,6 +179,8 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); + setTargetDAGCombine(ISD::ADDE); + setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -419,6 +421,163 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, return MipsTargetLowering::LowerOperation(Op, DAG); } +// selectMADD - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc multLo, Lo0), (adde multHi, Hi0), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { + // ADDENode's second operand must be a flag output of an ADDC node in order + // for the matching to be successful. + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); + + if (ADDCNode->getOpcode() != ISD::ADDC) + return false; + + SDValue MultHi = ADDENode->getOperand(0); + SDValue MultLo = ADDCNode->getOperand(0); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MADD only if ADDENode and ADDCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than ADDENode or ADDCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MADD instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + SDLoc DL(ADDENode); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + ADDCNode->getOperand(1), + ADDENode->getOperand(1)); + + // create MipsMAdd(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; + + SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of adde and addc here + if (!SDValue(ADDCNode, 0).use_empty()) { + SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); + } + if (!SDValue(ADDENode, 0).use_empty()) { + SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); + } + + return true; +} + +// selectMSUB - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc Lo0, multLo), (sube Hi0, multHi), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { + // SUBENode's second operand must be a flag output of an SUBC node in order + // for the matching to be successful. + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); + + if (SUBCNode->getOpcode() != ISD::SUBC) + return false; + + SDValue MultHi = SUBENode->getOperand(1); + SDValue MultLo = SUBCNode->getOperand(1); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MSUB only if SUBENode and SUBCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than SUBENode or SUBCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MSUB instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + SDLoc DL(SUBENode); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + SUBCNode->getOperand(0), + SUBENode->getOperand(0)); + + // create MipsSub(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; + + SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of sube and subc here + if (!SDValue(SUBCNode, 0).use_empty()) { + SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); + } + if (!SDValue(SUBENode, 0).use_empty()) { + SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); + } + + return true; +} + +static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: @@ -661,6 +820,19 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && + selectMSUB(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Clear the upper (64 - VT.sizeInBits) bits. @@ -938,12 +1110,16 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SDValue Val; switch (N->getOpcode()) { + case ISD::ADDE: + return performADDECombine(N, DAG, DCI, Subtarget); case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; case ISD::OR: Val = performORCombine(N, DAG, DCI, Subtarget); break; + case ISD::SUBE: + return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index dd7707084948..a64d95512a4a 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -141,9 +141,9 @@ int NVPTXTTIImpl::getArithmeticInstrCost( } } -void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, +void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); // Enable partial unrolling and runtime unrolling, but reduce the // threshold. This partially unrolls small loops which are often diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 03075b550429..f987892ba675 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -61,7 +61,8 @@ public: TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef<const Value *> Args = ArrayRef<const Value *>()); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); }; } // end namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 6d7eb786a683..7393f3d7a08a 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -131,10 +131,11 @@ public: } } - void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) override { + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override { switch ((PPC::Fixups)Fixup.getKind()) { - default: break; + default: + return false; case PPC::fixup_ppc_br24: case PPC::fixup_ppc_br24abs: // If the target symbol has a local entry point we must not attempt @@ -147,10 +148,10 @@ public: // and thus the shift to pack it. unsigned Other = S->getOther() << 2; if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) - IsResolved = false; + return true; } } - break; + return false; } } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index ae43e59d3cb1..dce443997ea5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -17,35 +17,31 @@ namespace llvm { namespace PPC { enum Fixups { - // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b' - // and 'bl'. + // 24-bit PC relative relocation for direct branches like 'b' and 'bl'. fixup_ppc_br24 = FirstTargetFixupKind, - - /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional - /// branches. + + /// 14-bit PC relative relocation for conditional branches. fixup_ppc_brcond14, - - /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches - /// like 'ba' and 'bla'. + + /// 24-bit absolute relocation for direct branches like 'ba' and 'bla'. fixup_ppc_br24abs, - /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional - /// branches. + /// 14-bit absolute relocation for conditional branches. fixup_ppc_brcond14abs, - /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo) - /// or ha16(_foo) for instrs like 'li' or 'addis'. + /// A 16-bit fixup corresponding to lo16(_foo) or ha16(_foo) for instrs like + /// 'li' or 'addis'. fixup_ppc_half16, - - /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with - /// implied 2 zero bits for instrs like 'std'. + + /// A 14-bit fixup corresponding to lo16(_foo) with implied 2 zero bits for + /// instrs like 'std'. fixup_ppc_half16ds, - /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call - /// to __tls_get_addr for the TLS general and local dynamic models, - /// or inserts the thread-pointer register number. + /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the + /// TLS general and local dynamic models, or inserts the thread-pointer + /// register number. fixup_ppc_nofixup, - + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 6d591ca964a6..d5506277ca88 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -219,11 +219,11 @@ bool PPCMachObjectWriter::recordScatteredRelocation( const MCSymbol *SB = &B->getSymbol(); if (!SB->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + report_fatal_error("symbol '" + SB->getName() + "' can not be undefined in a subtraction expression"); // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h - Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); + Value2 = Writer->getSymbolAddress(*SB, Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } // FIXME: does FixedValue get used?? diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 07c9c1f9f84c..ad92ac8ce120 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_POWERPC_PPC_H #define LLVM_LIB_TARGET_POWERPC_PPC_H +#include "llvm/Support/CodeGen.h" #include "MCTargetDesc/PPCMCTargetDesc.h" // GCC #defines PPC on Linux but we use it as our namespace name @@ -28,7 +29,7 @@ namespace llvm { class AsmPrinter; class MCInst; - FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM); + FunctionPass *createPPCCTRLoops(); #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif @@ -41,7 +42,7 @@ namespace llvm { FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCQPXLoadSplatPass(); - FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL); FunctionPass *createPPCTLSDynamicCallPass(); FunctionPass *createPPCBoolRetToIntPass(); FunctionPass *createPPCExpandISELPass(); @@ -51,6 +52,7 @@ namespace llvm { void initializePPCVSXFMAMutatePass(PassRegistry&); void initializePPCBoolRetToIntPass(PassRegistry&); void initializePPCExpandISELPass(PassRegistry &); + void initializePPCTLSDynamicCallPass(PassRegistry &); extern char &PPCVSXFMAMutateID; namespace PPCII { diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 24bc027f8106..094d3e6a61b5 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -24,12 +24,14 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -81,10 +83,7 @@ namespace { public: static char ID; - PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { - initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); - } - PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { + PPCCTRLoops() : FunctionPass(ID) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } @@ -99,16 +98,18 @@ namespace { } private: - bool mightUseCTR(const Triple &TT, BasicBlock *BB); + bool mightUseCTR(BasicBlock *BB); bool convertToCTRLoop(Loop *L); private: - PPCTargetMachine *TM; + const PPCTargetMachine *TM; + const PPCSubtarget *STI; + const PPCTargetLowering *TLI; + const DataLayout *DL; + const TargetLibraryInfo *LibInfo; LoopInfo *LI; ScalarEvolution *SE; - const DataLayout *DL; DominatorTree *DT; - const TargetLibraryInfo *LibInfo; bool PreserveLCSSA; }; @@ -149,9 +150,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", false, false) -FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) { - return new PPCCTRLoops(TM); -} +FunctionPass *llvm::createPPCCTRLoops() { return new PPCCTRLoops(); } #ifndef NDEBUG INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify", @@ -169,6 +168,14 @@ bool PPCCTRLoops::runOnFunction(Function &F) { if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); + if (!TPC) + return false; + + TM = &TPC->getTM<PPCTargetMachine>(); + STI = TM->getSubtargetImpl(F); + TLI = STI->getTargetLowering(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -198,8 +205,7 @@ static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) { // Determining the address of a TLS variable results in a function call in // certain TLS models. -static bool memAddrUsesCTR(const PPCTargetMachine *TM, - const Value *MemAddr) { +static bool memAddrUsesCTR(const PPCTargetMachine &TM, const Value *MemAddr) { const auto *GV = dyn_cast<GlobalValue>(MemAddr); if (!GV) { // Recurse to check for constants that refer to TLS global variables. @@ -213,35 +219,35 @@ static bool memAddrUsesCTR(const PPCTargetMachine *TM, if (!GV->isThreadLocal()) return false; - if (!TM) - return true; - TLSModel::Model Model = TM->getTLSModel(GV); + TLSModel::Model Model = TM.getTLSModel(GV); return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; } -bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { +// Loop through the inline asm constraints and look for something that clobbers +// ctr. +static bool asmClobbersCTR(InlineAsm *IA) { + InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); + for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { + InlineAsm::ConstraintInfo &C = CIV[i]; + if (C.Type != InlineAsm::isInput) + for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) + if (StringRef(C.Codes[j]).equals_lower("{ctr}")) + return true; + } + return false; +} + +bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { if (CallInst *CI = dyn_cast<CallInst>(J)) { + // Inline ASM is okay, unless it clobbers the ctr register. if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { - // Inline ASM is okay, unless it clobbers the ctr register. - InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); - for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { - InlineAsm::ConstraintInfo &C = CIV[i]; - if (C.Type != InlineAsm::isInput) - for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) - if (StringRef(C.Codes[j]).equals_lower("{ctr}")) - return true; - } - + if (asmClobbersCTR(IA)) + return true; continue; } - if (!TM) - return true; - const TargetLowering *TLI = - TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); - if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. // sin, cos, exp and log are always calls. @@ -380,9 +386,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { } if (Opcode) { - auto &DL = CI->getModule()->getDataLayout(); - MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(), - true); + MVT VTy = TLI->getSimpleValueType( + *DL, CI->getArgOperand(0)->getType(), true); if (VTy == MVT::Other) return true; @@ -406,17 +411,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { CastInst *CI = cast<CastInst>(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) + isLargeIntegerTy(!TM->isPPC64(), CI->getSrcTy()->getScalarType()) || + isLargeIntegerTy(!TM->isPPC64(), CI->getDestTy()->getScalarType())) return true; - } else if (isLargeIntegerTy(TT.isArch32Bit(), + } else if (isLargeIntegerTy(!TM->isPPC64(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; - } else if (TT.isArch32Bit() && + } else if (!TM->isPPC64() && isLargeIntegerTy(false, J->getType()->getScalarType()) && (J->getOpcode() == Instruction::Shl || J->getOpcode() == Instruction::AShr || @@ -428,16 +433,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { // On PowerPC, indirect jumps use the counter register. return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { - if (!TM) - return true; - const TargetLowering *TLI = - TM->getSubtargetImpl(*BB->getParent())->getTargetLowering(); - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) return true; } - if (TM->getSubtargetImpl(*BB->getParent())->getTargetLowering()->useSoftFloat()) { + if (STI->useSoftFloat()) { switch(J->getOpcode()) { case Instruction::FAdd: case Instruction::FSub: @@ -456,7 +456,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { } for (Value *Operand : J->operands()) - if (memAddrUsesCTR(TM, Operand)) + if (memAddrUsesCTR(*TM, Operand)) return true; } @@ -466,11 +466,6 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; - const Triple TT = - Triple(L->getHeader()->getParent()->getParent()->getTargetTriple()); - if (!TT.isArch32Bit() && !TT.isArch64Bit()) - return MadeChange; // Unknown arch. type. - // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); @@ -495,7 +490,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // want to use the counter register if the loop contains calls. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); I != IE; ++I) - if (mightUseCTR(TT, *I)) + if (mightUseCTR(*I)) return MadeChange; SmallVector<BasicBlock*, 4> ExitingBlocks; @@ -517,7 +512,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { } else if (!SE->isLoopInvariant(EC, L)) continue; - if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32)) + if (SE->getTypeSizeInBits(EC->getType()) > (TM->isPPC64() ? 64 : 32)) continue; // We now have a loop-invariant count of loop iterations (which is not the @@ -571,7 +566,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // preheader, then we can use it (except if the preheader contains a use of // the CTR register because some such uses might be reordered by the // selection DAG after the mtctr instruction). - if (!Preheader || mightUseCTR(TT, Preheader)) + if (!Preheader || mightUseCTR(Preheader)) Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); if (!Preheader) return MadeChange; @@ -582,10 +577,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // selected branch. MadeChange = true; - SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt"); + SCEVExpander SCEVE(*SE, *DL, "loopcnt"); LLVMContext &C = SE->getContext(); - Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) : - Type::getInt32Ty(C); + Type *CountType = TM->isPPC64() ? Type::getInt64Ty(C) : Type::getInt32Ty(C); if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) ExitCount = SE->getZeroExtendExpr(ExitCount, CountType); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index afd2e87078a9..535b9deaefac 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -114,8 +114,8 @@ namespace { unsigned GlobalBaseReg; public: - explicit PPCDAGToDAGISel(PPCTargetMachine &tm) - : SelectionDAGISel(tm), TM(tm) {} + explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm) {} bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary @@ -5116,6 +5116,7 @@ void PPCDAGToDAGISel::PeepholePPC64() { /// createPPCISelDag - This pass converts a legalized DAG into a /// PowerPC-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { - return new PPCDAGToDAGISel(TM); +FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PPCDAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 31c50785c2ee..5f8085f4626e 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -52,6 +52,7 @@ namespace { protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; + bool NeedFence = true; bool Is64Bit = MBB.getParent()->getSubtarget<PPCSubtarget>().isPPC64(); for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); @@ -62,6 +63,16 @@ protected: MI.getOpcode() != PPC::ADDItlsldLADDR && MI.getOpcode() != PPC::ADDItlsgdLADDR32 && MI.getOpcode() != PPC::ADDItlsldLADDR32) { + + // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP + // as scheduling fences, we skip creating fences if we already + // have existing ADJCALLSTACKDOWN/UP to avoid nesting, + // which causes verification error with -verify-machineinstrs. + if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) + NeedFence = false; + else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) + NeedFence = true; + ++I; continue; } @@ -96,11 +107,15 @@ protected: break; } - // Don't really need to save data to the stack - the clobbered + // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr + // as schduling fence to avoid it is scheduled before + // mflr in the prologue and the address in LR is clobbered (PR25839). + // We don't really need to save data to the stack - the clobbered // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) - .addImm(0); + if (NeedFence) + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) + .addImm(0); // Expand into two ops built prior to the existing instruction. MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) @@ -116,7 +131,8 @@ protected: .addReg(GPR3)); Call->addOperand(MI.getOperand(3)); - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); + if (NeedFence) + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0); BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg) .addReg(GPR3); diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index a88a6541e8d0..fe092cc3b858 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -93,6 +93,7 @@ extern "C" void LLVMInitializePowerPCTarget() { PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); initializePPCExpandISELPass(PR); + initializePPCTLSDynamicCallPass(PR); } /// Return the datalayout string of a subtarget. @@ -336,7 +337,7 @@ bool PPCPassConfig::addPreISel() { addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createPPCCTRLoops(getPPCTargetMachine())); + addPass(createPPCCTRLoops()); return false; } @@ -352,7 +353,7 @@ bool PPCPassConfig::addILPOpts() { bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - addPass(createPPCISelDag(getPPCTargetMachine())); + addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel())); #ifndef NDEBUG if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 5eb6ba785d1b..2dc3828334ac 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -41,6 +41,7 @@ public: ~PPCTargetMachine() override; const PPCSubtarget *getSubtargetImpl(const Function &F) const override; + const PPCSubtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 3dbd5f5b9a92..6110706b01b9 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -189,7 +189,7 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, return PPCTTIImpl::getIntImmCost(Imm, Ty); } -void PPCTTIImpl::getUnrollingPreferences(Loop *L, +void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { if (ST->getDarwinDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling @@ -201,7 +201,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, UP.AllowExpensiveTripCount = true; } - BaseT::getUnrollingPreferences(L, UP); + BaseT::getUnrollingPreferences(L, SE, UP); } bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 758c335def08..99ca6394d1be 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -52,7 +52,8 @@ public: Type *Ty); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); /// @} diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index c72b47b09085..d4454c271f5a 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -203,13 +203,14 @@ namespace { return InfosBE[Kind - FirstTargetFixupKind]; } - void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, bool &IsResolved) override { + bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target) override { switch ((Sparc::Fixups)Fixup.getKind()) { - default: break; + default: + return false; case Sparc::fixup_sparc_wplt30: if (Target.getSymA()->getSymbol().isTemporary()) - return; + return false; case Sparc::fixup_sparc_tls_gd_hi22: case Sparc::fixup_sparc_tls_gd_lo10: case Sparc::fixup_sparc_tls_gd_add: @@ -227,7 +228,8 @@ namespace { case Sparc::fixup_sparc_tls_ie_ldx: case Sparc::fixup_sparc_tls_ie_add: case Sparc::fixup_sparc_tls_le_hix22: - case Sparc::fixup_sparc_tls_le_lox10: IsResolved = false; break; + case Sparc::fixup_sparc_tls_le_lox10: + return true; } } diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index ad05779a9f64..ee23692ad1db 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -61,6 +61,7 @@ enum RegisterKind { VR64Reg, VR128Reg, AR32Reg, + CR64Reg, }; enum MemoryKind { @@ -343,6 +344,7 @@ public: bool isVF128() const { return false; } bool isVR128() const { return isReg(VR128Reg); } bool isAR32() const { return isReg(AR32Reg); } + bool isCR64() const { return isReg(CR64Reg); } bool isAnyReg() const { return (isReg() || isImm(0, 15)); } bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } @@ -379,7 +381,8 @@ private: RegGR, RegFP, RegV, - RegAR + RegAR, + RegCR }; struct Register { RegisterGroup Group; @@ -487,6 +490,9 @@ public: OperandMatchResultTy parseAR32(OperandVector &Operands) { return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg); } + OperandMatchResultTy parseCR64(OperandVector &Operands) { + return parseRegister(Operands, RegCR, SystemZMC::CR64Regs, CR64Reg); + } OperandMatchResultTy parseAnyReg(OperandVector &Operands) { return parseAnyRegister(Operands); } @@ -648,6 +654,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg) { Reg.Group = RegV; else if (Prefix == 'a' && Reg.Num < 16) Reg.Group = RegAR; + else if (Prefix == 'c' && Reg.Num < 16) + Reg.Group = RegCR; else return Error(Reg.StartLoc, "invalid register"); @@ -741,6 +749,10 @@ SystemZAsmParser::parseAnyRegister(OperandVector &Operands) { Kind = AR32Reg; RegNo = SystemZMC::AR32Regs[Reg.Num]; } + else if (Reg.Group == RegCR) { + Kind = CR64Reg; + RegNo = SystemZMC::CR64Regs[Reg.Num]; + } else { return MatchOperand_ParseFail; } @@ -1056,6 +1068,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, RegNo = SystemZMC::VR128Regs[Reg.Num]; else if (Reg.Group == RegAR) RegNo = SystemZMC::AR32Regs[Reg.Num]; + else if (Reg.Group == RegCR) + RegNo = SystemZMC::CR64Regs[Reg.Num]; StartLoc = Reg.StartLoc; EndLoc = Reg.EndLoc; return false; diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 27fd70bc6092..8903b57ffd0b 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -162,6 +162,12 @@ static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16); } +static DecodeStatus DecodeCR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, SystemZMC::CR64Regs, 16); +} + template<unsigned N> static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { if (!isUInt<N>(Imm)) diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index dfea7e33fa15..727ab921daf9 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -116,6 +116,13 @@ const unsigned SystemZMC::AR32Regs[16] = { SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15 }; +const unsigned SystemZMC::CR64Regs[16] = { + SystemZ::C0, SystemZ::C1, SystemZ::C2, SystemZ::C3, + SystemZ::C4, SystemZ::C5, SystemZ::C6, SystemZ::C7, + SystemZ::C8, SystemZ::C9, SystemZ::C10, SystemZ::C11, + SystemZ::C12, SystemZ::C13, SystemZ::C14, SystemZ::C15 +}; + unsigned SystemZMC::getFirstReg(unsigned Reg) { static unsigned Map[SystemZ::NUM_TARGET_REGS]; static bool Initialized = false; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index d9926c7e4986..dbca3485290a 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -55,6 +55,7 @@ extern const unsigned VR32Regs[32]; extern const unsigned VR64Regs[32]; extern const unsigned VR128Regs[32]; extern const unsigned AR32Regs[16]; +extern const unsigned CR64Regs[16]; // Return the 0-based number of the first architectural register that // contains the given LLVM register. E.g. R1D -> 1. diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 74cf653b9d95..9b714157550d 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -67,6 +67,11 @@ We don't use ICM, STCM, or CLM. -- +We don't use ADD (LOGICAL) HIGH, SUBTRACT (LOGICAL) HIGH, +or COMPARE (LOGICAL) HIGH yet. + +-- + DAGCombiner doesn't yet fold truncations of extended loads. Functions like: unsigned long f (unsigned long x, unsigned short *y) diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index c5f324418da5..41300a1b6295 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -56,6 +56,7 @@ include "SystemZInstrVector.td" include "SystemZInstrFP.td" include "SystemZInstrHFP.td" include "SystemZInstrDFP.td" +include "SystemZInstrSystem.td" def SystemZInstrInfo : InstrInfo {} diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td index ffb0b8d1c861..c5faa0d62881 100644 --- a/lib/Target/SystemZ/SystemZFeatures.td +++ b/lib/Target/SystemZ/SystemZFeatures.td @@ -68,11 +68,21 @@ def FeaturePopulationCount : SystemZFeature< "Assume that the population-count facility is installed" >; +def FeatureMessageSecurityAssist3 : SystemZFeature< + "message-security-assist-extension3", "MessageSecurityAssist3", + "Assume that the message-security-assist extension facility 3 is installed" +>; + def FeatureMessageSecurityAssist4 : SystemZFeature< "message-security-assist-extension4", "MessageSecurityAssist4", "Assume that the message-security-assist extension facility 4 is installed" >; +def FeatureResetReferenceBitsMultiple : SystemZFeature< + "reset-reference-bits-multiple", "ResetReferenceBitsMultiple", + "Assume that the reset-reference-bits-multiple facility is installed" +>; + def Arch9NewFeatures : SystemZFeatureList<[ FeatureDistinctOps, FeatureFastSerialization, @@ -81,7 +91,9 @@ def Arch9NewFeatures : SystemZFeatureList<[ FeatureInterlockedAccess1, FeatureLoadStoreOnCond, FeaturePopulationCount, - FeatureMessageSecurityAssist4 + FeatureMessageSecurityAssist3, + FeatureMessageSecurityAssist4, + FeatureResetReferenceBitsMultiple ]>; //===----------------------------------------------------------------------===// @@ -120,13 +132,19 @@ def FeatureDFPZonedConversion : SystemZFeature< "Assume that the DFP zoned-conversion facility is installed" >; +def FeatureEnhancedDAT2 : SystemZFeature< + "enhanced-dat-2", "EnhancedDAT2", + "Assume that the enhanced-DAT facility 2 is installed" +>; + def Arch10NewFeatures : SystemZFeatureList<[ FeatureExecutionHint, FeatureLoadAndTrap, FeatureMiscellaneousExtensions, FeatureProcessorAssist, FeatureTransactionalExecution, - FeatureDFPZonedConversion + FeatureDFPZonedConversion, + FeatureEnhancedDAT2 ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 5f6115ed86a4..7620e06ccbc9 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -2468,6 +2468,14 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, let OpType = "reg"; } +class UnaryTiedRRE<string mnemonic, bits<16> opcode, RegisterOperand cls> + : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src), + mnemonic#"\t$R1", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let R2 = 0; +} + class UnaryMemRRFc<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src), @@ -2702,6 +2710,26 @@ class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode, let AddedComplexity = 7; } +class SideEffectBinaryRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", []>; + +class SideEffectBinaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", []> { + let R3 = 0; + let M4 = 0; +} + +class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2), + mnemonic#"\t$R1, $R2", []> { + let M3 = 0; +} + class SideEffectBinaryIE<string mnemonic, bits<16> opcode, Immediate imm1, Immediate imm2> : InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2), @@ -2729,6 +2757,10 @@ class SideEffectBinarySSf<string mnemonic, bits<8> opcode> : InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2), mnemonic##"\t$BD1, $BDL2", []>; +class SideEffectBinarySSE<string mnemonic, bits<16> opcode> + : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2), + mnemonic#"\t$BD1, $BD2", []>; + class SideEffectBinaryMemMemRR<string mnemonic, bits<8> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs cls1:$R1, cls2:$R2), (ins cls1:$R1src, cls2:$R2src), @@ -3612,6 +3644,22 @@ class SideEffectTernarySSc<string mnemonic, bits<8> opcode> shift12only:$BD2, imm32zx4:$I3), mnemonic##"\t$BDL1, $BD2, $I3", []>; +class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3), + mnemonic#"\t$R1, $R2, $R3", []> { + let M4 = 0; +} + +class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3), + mnemonic#"\t$R1, $R3, $R2", []> { + let M4 = 0; +} + class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, @@ -3630,6 +3678,13 @@ class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode, : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3), mnemonic#"\t$R1, $R2, $M3", []>; +multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2> { + def "" : SideEffectTernaryRRFc<mnemonic, opcode, cls1, cls2, imm32zx4>; + def Opt : SideEffectBinaryRRFc<mnemonic, opcode, cls1, cls2>; +} + class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2, Immediate imm> @@ -3720,6 +3775,18 @@ multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, } } +class SideEffectTernaryRS<string mnemonic, bits<8> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSa<opcode, (outs), + (ins cls1:$R1, cls2:$R3, bdaddr12only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []>; + +class SideEffectTernaryRSY<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSYa<opcode, (outs), + (ins cls1:$R1, cls2:$R3, bdaddr20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", []>; + class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRSa<opcode, (outs cls1:$R1, cls2:$R3), @@ -3997,6 +4064,35 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> { VR128:$V4, imm32zx4:$M5, 0)>; } +class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4), + mnemonic#"\t$R1, $R2, $R3, $M4", []>; + +multiclass SideEffectQuaternaryRRFaOptOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> { + def "" : SideEffectQuaternaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def Opt : SideEffectTernaryRRFa<mnemonic, opcode, cls1, cls2, cls3>; + def OptOpt : SideEffectBinaryRRFa<mnemonic, opcode, cls1, cls2>; +} + +class SideEffectQuaternaryRRFb<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb<opcode, (outs), (ins cls1:$R1, cls2:$R2, cls3:$R3, imm32zx4:$M4), + mnemonic#"\t$R1, $R3, $R2, $M4", []>; + +multiclass SideEffectQuaternaryRRFbOpt<string mnemonic, bits<16> opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> { + def "" : SideEffectQuaternaryRRFb<mnemonic, opcode, cls1, cls2, cls3>; + def Opt : SideEffectTernaryRRFb<mnemonic, opcode, cls1, cls2, cls3>; +} + class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode, RegisterOperand cls> : InstSSe<opcode, (outs), @@ -4012,6 +4108,16 @@ class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, let mayStore = 1; } +class CmpSwapRRE<string mnemonic, bits<16> opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2), + mnemonic#"\t$R1, $R2", []> { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let mayLoad = 1; + let mayStore = 1; +} + class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, AddressingMode mode = bdaddr12only> : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2), diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 9f5e6288348e..98f66c29ae64 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -883,6 +883,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { } def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>; + // Addition to a high register. + def AHHHR : BinaryRRFa<"ahhhr", 0xB9C8, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def AHHLR : BinaryRRFa<"ahhlr", 0xB9D8, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Addition of signed 16-bit immediates. defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>; defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>; @@ -917,6 +923,12 @@ let Defs = [CC] in { } def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>; + // Addition to a high register. + def ALHHHR : BinaryRRFa<"alhhhr", 0xB9CA, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def ALHHLR : BinaryRRFa<"alhhlr", 0xB9DA, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Addition of signed 16-bit immediates. def ALHSIK : BinaryRIE<"alhsik", 0xECDA, addc, GR32, imm32sx16>, Requires<[FeatureDistinctOps]>; @@ -927,6 +939,10 @@ let Defs = [CC] in { def ALFI : BinaryRIL<"alfi", 0xC2B, addc, GR32, uimm32>; def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>; + // Addition of signed 32-bit immediates. + def ALSIH : BinaryRIL<"alsih", 0xCCA, null_frag, GRH32, simm32>, + Requires<[FeatureHighWord]>; + // Addition of memory. defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>; @@ -949,6 +965,10 @@ let Defs = [CC], Uses = [CC] in { def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load, 8>; } +// Addition that does not modify the condition code. +def ALSIHN : BinaryRIL<"alsihn", 0xCCB, null_frag, GRH32, simm32>, + Requires<[FeatureHighWord]>; + //===----------------------------------------------------------------------===// // Subtraction //===----------------------------------------------------------------------===// @@ -961,6 +981,12 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in { def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>; defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>; + // Subtraction from a high register. + def SHHHR : BinaryRRFa<"shhhr", 0xB9C9, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def SHHLR : BinaryRRFa<"shhlr", 0xB9D9, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>; defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>; @@ -976,6 +1002,12 @@ let Defs = [CC] in { def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>; defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>; + // Subtraction from a high register. + def SLHHHR : BinaryRRFa<"slhhhr", 0xB9CB, null_frag, GRH32, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def SLHHLR : BinaryRRFa<"slhhlr", 0xB9DB, null_frag, GRH32, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Subtraction of unsigned 32-bit immediates. These don't match // subc because we prefer addc for constants. def SLFI : BinaryRIL<"slfi", 0xC25, null_frag, GR32, uimm32>; @@ -1298,6 +1330,12 @@ let Defs = [CC], CCValues = 0xE in { def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>; def CGR : CompareRRE<"cgr", 0xB920, z_scmp, GR64, GR64>; + // Comparison with a high register. + def CHHR : CompareRRE<"chhr", 0xB9CD, null_frag, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def CHLR : CompareRRE<"chlr", 0xB9DD, null_frag, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Comparison with a signed 16-bit immediate. CHIMux expands to CHI or CIH, // depending on the choice of register. def CHIMux : CompareRIPseudo<z_scmp, GRX32, imm32sx16>, @@ -1344,6 +1382,12 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>; def CLGR : CompareRRE<"clgr", 0xB921, z_ucmp, GR64, GR64>; + // Comparison with a high register. + def CLHHR : CompareRRE<"clhhr", 0xB9CF, null_frag, GRH32, GRH32>, + Requires<[FeatureHighWord]>; + def CLHLR : CompareRRE<"clhlr", 0xB9DF, null_frag, GRH32, GR32>, + Requires<[FeatureHighWord]>; + // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI // or CLIH, depending on the choice of register. def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>, @@ -1888,54 +1932,12 @@ let mayLoad = 1, Defs = [CC] in let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>; -// Supervisor call. -let hasSideEffects = 1, isCall = 1, Defs = [CC] in - def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>; - -// Monitor call. -let hasSideEffects = 1, isCall = 1 in - def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>; - -// Store clock. -let hasSideEffects = 1, Defs = [CC] in { - def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>; - def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>; - def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>; -} - -// Store facility list. -let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in - def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>; - -// Extract CPU attribute. -let hasSideEffects = 1 in - def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>; - -// Extract CPU time. -let Defs = [R0D, R1D], hasSideEffects = 1, mayLoad = 1 in - def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>; - -// Extract PSW. -let hasSideEffects = 1, Uses = [CC] in - def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; - // Execute. let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, GR64>; def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>; } -// Program return. -let hasSideEffects = 1, Defs = [CC] in - def PR : SideEffectInherentE<"pr", 0x0101>; - -// Move with key. -let mayLoad = 1, mayStore = 1, Defs = [CC] in - def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>; - -// Store real address. -def STRAG : StoreSSE<"strag", 0xE502>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZInstrSystem.td b/lib/Target/SystemZ/SystemZInstrSystem.td new file mode 100644 index 000000000000..a9803c2d83e9 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrSystem.td @@ -0,0 +1,517 @@ +//==- SystemZInstrSystem.td - SystemZ system instructions -*- tblgen-*-----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ system-level instructions. +// Most of these instructions are privileged or semi-privileged. They are +// not used for code generation, but are provided for use with the assembler +// and disassembler only. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Program-Status Word Instructions. +//===----------------------------------------------------------------------===// + +// Extract PSW. +let hasSideEffects = 1, Uses = [CC] in + def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; + +// Load PSW (extended). +let hasSideEffects = 1, Defs = [CC], mayLoad = 1 in { + def LPSW : SideEffectUnaryS<"lpsw", 0x8200, null_frag, 8>; + def LPSWE : SideEffectUnaryS<"lpswe", 0xB2B2, null_frag, 16>; +} + +// Insert PSW key. +let Uses = [R2L], Defs = [R2L] in + def IPK : SideEffectInherentS<"ipk", 0xB20B, null_frag>; + +// Set PSW key from address. +let hasSideEffects = 1 in + def SPKA : SideEffectAddressS<"spka", 0xB20A, null_frag>; + +// Set system mask. +let hasSideEffects = 1, mayLoad = 1 in + def SSM : SideEffectUnaryS<"ssm", 0x8000, null_frag, 1>; + +// Store then AND/OR system mask. +let hasSideEffects = 1 in { + def STNSM : StoreSI<"stnsm", 0xAC, null_frag, imm32zx8>; + def STOSM : StoreSI<"stosm", 0xAD, null_frag, imm32zx8>; +} + +// Insert address space control. +let hasSideEffects = 1 in + def IAC : InherentRRE<"iac", 0xB224, GR32, null_frag>; + +// Set address space control (fast). +let hasSideEffects = 1 in { + def SAC : SideEffectAddressS<"sac", 0xB219, null_frag>; + def SACF : SideEffectAddressS<"sacf", 0xB279, null_frag>; +} + +//===----------------------------------------------------------------------===// +// Control Register Instructions. +//===----------------------------------------------------------------------===// + +// Load control. +def LCTL : LoadMultipleRS<"lctl", 0xB7, CR64>; +def LCTLG : LoadMultipleRSY<"lctlg", 0xEB2F, CR64>; + +// Store control. +def STCTL : StoreMultipleRS<"stctl", 0xB6, CR64>; +def STCTG : StoreMultipleRSY<"stctg", 0xEB25, CR64>; + +// Extract primary ASN (and instance). +let hasSideEffects = 1 in { + def EPAR : InherentRRE<"epar", 0xB226, GR32, null_frag>; + def EPAIR : InherentRRE<"epair", 0xB99A, GR64, null_frag>; +} + +// Extract secondary ASN (and instance). +let hasSideEffects = 1 in { + def ESAR : InherentRRE<"esar", 0xB227, GR32, null_frag>; + def ESAIR : InherentRRE<"esair", 0xB99B, GR64, null_frag>; +} + +// Set secondary ASN (and instance). +let hasSideEffects = 1 in { + def SSAR : SideEffectUnaryRRE<"ssar", 0xB225, GR32, null_frag>; + def SSAIR : SideEffectUnaryRRE<"ssair", 0xB99F, GR64, null_frag>; +} + +// Extract and set extended authority. +let hasSideEffects = 1 in + def ESEA : UnaryTiedRRE<"esea", 0xB99D, GR32>; + +//===----------------------------------------------------------------------===// +// Prefix-Register Instructions. +//===----------------------------------------------------------------------===// + +// Set prefix. +let hasSideEffects = 1 in + def SPX : SideEffectUnaryS<"spx", 0xB210, null_frag, 4>; + +// Store prefix. +let hasSideEffects = 1 in + def STPX : StoreInherentS<"stpx", 0xB211, null_frag, 4>; + +//===----------------------------------------------------------------------===// +// Storage-Key and Real Memory Instructions. +//===----------------------------------------------------------------------===// + +// Insert storage key extended. +let hasSideEffects = 1 in + def ISKE : BinaryRRE<"iske", 0xB229, null_frag, GR32, GR64>; + +// Insert virtual storage key. +let hasSideEffects = 1 in + def IVSK : BinaryRRE<"ivsk", 0xB223, null_frag, GR32, GR64>; + +// Set storage key extended. +let hasSideEffects = 1, Defs = [CC] in + defm SSKE : SideEffectTernaryRRFcOpt<"sske", 0xB22B, GR32, GR64>; + +// Reset reference bit extended. +let hasSideEffects = 1, Defs = [CC] in + def RRBE : SideEffectBinaryRRE<"rrbe", 0xB22A, GR32, GR64>; + +// Reset reference bits multiple. +let Predicates = [FeatureResetReferenceBitsMultiple], hasSideEffects = 1 in + def RRBM : UnaryRRE<"rrbm", 0xB9AE, null_frag, GR64, GR64>; + +// Perform frame management function. +let hasSideEffects = 1 in + def PFMF : SideEffectBinaryMemRRE<"pfmf", 0xB9AF, GR32, GR64>; + +// Test block. +let hasSideEffects = 1, mayStore = 1, Uses = [R0D], Defs = [R0D, CC] in + def TB : SideEffectBinaryRRE<"tb", 0xB22C, GR64, GR64>; + +// Page in / out. +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def PGIN : SideEffectBinaryRRE<"pgin", 0xB22E, GR64, GR64>; + def PGOUT : SideEffectBinaryRRE<"pgout", 0xB22F, GR64, GR64>; +} + +//===----------------------------------------------------------------------===// +// Dynamic-Address-Translation Instructions. +//===----------------------------------------------------------------------===// + +// Invalidate page table entry. +let hasSideEffects = 1 in + defm IPTE : SideEffectQuaternaryRRFaOptOpt<"ipte", 0xB221, GR64, GR32, GR32>; + +// Invalidate DAT table entry. +let hasSideEffects = 1 in + defm IDTE : SideEffectQuaternaryRRFbOpt<"idte", 0xB98E, GR64, GR64, GR64>; + +// Compare and replace DAT table entry. +let Predicates = [FeatureEnhancedDAT2], hasSideEffects = 1, Defs = [CC] in + defm CRDTE : SideEffectQuaternaryRRFbOpt<"crdte", 0xB98F, GR128, GR128, GR64>; + +// Purge TLB. +let hasSideEffects = 1 in + def PTLB : SideEffectInherentS<"ptlb", 0xB20D, null_frag>; + +// Compare and swap and purge. +let hasSideEffects = 1, Defs = [CC] in { + def CSP : CmpSwapRRE<"csp", 0xB250, GR128, GR64>; + def CSPG : CmpSwapRRE<"cspg", 0xB98A, GR128, GR64>; +} + +// Load page-table-entry address. +let hasSideEffects = 1, Defs = [CC] in + def LPTEA : TernaryRRFb<"lptea", 0xB9AA, GR64, GR64, GR64>; + +// Load real address. +let hasSideEffects = 1, Defs = [CC] in { + defm LRA : LoadAddressRXPair<"lra", 0xB1, 0xE313, null_frag>; + def LRAG : LoadAddressRXY<"lrag", 0xE303, null_frag, laaddr20pair>; +} + +// Store real address. +def STRAG : StoreSSE<"strag", 0xE502>; + +// Load using real address. +let mayLoad = 1 in { + def LURA : UnaryRRE<"lura", 0xB24B, null_frag, GR32, GR64>; + def LURAG : UnaryRRE<"lurag", 0xB905, null_frag, GR64, GR64>; +} + +// Store using real address. +let mayStore = 1 in { + def STURA : SideEffectBinaryRRE<"stura", 0xB246, GR32, GR64>; + def STURG : SideEffectBinaryRRE<"sturg", 0xB925, GR64, GR64>; +} + +// Test protection. +let hasSideEffects = 1, Defs = [CC] in + def TPROT : SideEffectBinarySSE<"tprot", 0xE501>; + +//===----------------------------------------------------------------------===// +// Memory-move Instructions. +//===----------------------------------------------------------------------===// + +// Move with key. +let mayLoad = 1, mayStore = 1, Defs = [CC] in + def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>; + +// Move to primary / secondary. +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def MVCP : MemoryBinarySSd<"mvcp", 0xDA, GR64>; + def MVCS : MemoryBinarySSd<"mvcs", 0xDB, GR64>; +} + +// Move with source / destination key. +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1L] in { + def MVCSK : SideEffectBinarySSE<"mvcsk", 0xE50E>; + def MVCDK : SideEffectBinarySSE<"mvcdk", 0xE50F>; +} + +// Move with optional specifications. +let mayLoad = 1, mayStore = 1, Uses = [R0L] in + def MVCOS : SideEffectTernarySSF<"mvcos", 0xC80, GR64>; + +// Move page. +let mayLoad = 1, mayStore = 1, Uses = [R0L], Defs = [CC] in + def MVPG : SideEffectBinaryRRE<"mvpg", 0xB254, GR64, GR64>; + +//===----------------------------------------------------------------------===// +// Address-Space Instructions. +//===----------------------------------------------------------------------===// + +// Load address space parameters. +let hasSideEffects = 1, Defs = [CC] in + def LASP : SideEffectBinarySSE<"lasp", 0xE500>; + +// Purge ALB. +let hasSideEffects = 1 in + def PALB : SideEffectInherentRRE<"palb", 0xB248>; + +// Program call. +let hasSideEffects = 1 in + def PC : SideEffectAddressS<"pc", 0xB218, null_frag>; + +// Program return. +let hasSideEffects = 1, Defs = [CC] in + def PR : SideEffectInherentE<"pr", 0x0101>; + +// Program transfer (with instance). +let hasSideEffects = 1 in { + def PT : SideEffectBinaryRRE<"pt", 0xB228, GR32, GR64>; + def PTI : SideEffectBinaryRRE<"pti", 0xB99E, GR64, GR64>; +} + +// Resume program. +let hasSideEffects = 1, Defs = [CC] in + def RP : SideEffectAddressS<"rp", 0xB277, null_frag>; + +// Branch in subspace group. +let hasSideEffects = 1 in + def BSG : UnaryRRE<"bsg", 0xB258, null_frag, GR64, GR64>; + +// Branch and set authority. +let hasSideEffects = 1 in + def BSA : UnaryRRE<"bsa", 0xB25A, null_frag, GR64, GR64>; + +// Test access. +let Defs = [CC] in + def TAR : SideEffectBinaryRRE<"tar", 0xB24C, AR32, GR32>; + +//===----------------------------------------------------------------------===// +// Linkage-Stack Instructions. +//===----------------------------------------------------------------------===// + +// Branch and stack. +let hasSideEffects = 1 in + def BAKR : SideEffectBinaryRRE<"bakr", 0xB240, GR64, GR64>; + +// Extract stacked registers. +let hasSideEffects = 1 in { + def EREG : SideEffectBinaryRRE<"ereg", 0xB249, GR32, GR32>; + def EREGG : SideEffectBinaryRRE<"eregg", 0xB90E, GR64, GR64>; +} + +// Extract stacked state. +let hasSideEffects = 1, Defs = [CC] in + def ESTA : UnaryRRE<"esta", 0xB24A, null_frag, GR128, GR32>; + +// Modify stacked state. +let hasSideEffects = 1 in + def MSTA : SideEffectUnaryRRE<"msta", 0xB247, GR128, null_frag>; + +//===----------------------------------------------------------------------===// +// Time-Related Instructions. +//===----------------------------------------------------------------------===// + +// Perform timing facility function. +let hasSideEffects = 1, mayLoad = 1, Uses = [R0L, R1D], Defs = [CC] in + def PTFF : SideEffectInherentE<"ptff", 0x0104>; + +// Set clock. +let hasSideEffects = 1, Defs = [CC] in + def SCK : SideEffectUnaryS<"sck", 0xB204, null_frag, 8>; + +// Set clock programmable field. +let hasSideEffects = 1, Uses = [R0L] in + def SCKPF : SideEffectInherentE<"sckpf", 0x0107>; + +// Set clock comparator. +let hasSideEffects = 1 in + def SCKC : SideEffectUnaryS<"sckc", 0xB206, null_frag, 8>; + +// Set CPU timer. +let hasSideEffects = 1 in + def SPT : SideEffectUnaryS<"spt", 0xB208, null_frag, 8>; + +// Store clock (fast / extended). +let hasSideEffects = 1, Defs = [CC] in { + def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>; + def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>; + def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>; +} + +// Store clock comparator. +let hasSideEffects = 1 in + def STCKC : StoreInherentS<"stckc", 0xB207, null_frag, 8>; + +// Store CPU timer. +let hasSideEffects = 1 in + def STPT : StoreInherentS<"stpt", 0xB209, null_frag, 8>; + +//===----------------------------------------------------------------------===// +// CPU-Related Instructions. +//===----------------------------------------------------------------------===// + +// Store CPU address. +let hasSideEffects = 1 in + def STAP : StoreInherentS<"stap", 0xB212, null_frag, 2>; + +// Store CPU ID. +let hasSideEffects = 1 in + def STIDP : StoreInherentS<"stidp", 0xB202, null_frag, 8>; + +// Store system information. +let hasSideEffects = 1, Uses = [R0L, R1L], Defs = [R0L, CC] in + def STSI : StoreInherentS<"stsi", 0xB27D, null_frag, 0>; + +// Store facility list. +let hasSideEffects = 1 in + def STFL : StoreInherentS<"stfl", 0xB2B1, null_frag, 4>; + +// Store facility list extended. +let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in + def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>; + +// Extract CPU attribute. +let hasSideEffects = 1 in + def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>; + +// Extract CPU time. +let hasSideEffects = 1, mayLoad = 1, Defs = [R0D, R1D] in + def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>; + +// Perform topology function. +let hasSideEffects = 1 in + def PTF : UnaryTiedRRE<"ptf", 0xB9A2, GR64>; + +// Perform cryptographic key management operation. +let Predicates = [FeatureMessageSecurityAssist3], + hasSideEffects = 1, Uses = [R0L, R1D] in + def PCKMO : SideEffectInherentRRE<"pckmo", 0xB928>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +//===----------------------------------------------------------------------===// + +// Supervisor call. +let hasSideEffects = 1, isCall = 1, Defs = [CC] in + def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>; + +// Monitor call. +let hasSideEffects = 1, isCall = 1 in + def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>; + +// Diagnose. +let hasSideEffects = 1, isCall = 1 in + def DIAG : SideEffectTernaryRS<"diag", 0x83, GR32, GR32>; + +// Trace. +let hasSideEffects = 1, mayLoad = 1 in { + def TRACE : SideEffectTernaryRS<"trace", 0x99, GR32, GR32>; + def TRACG : SideEffectTernaryRSY<"tracg", 0xEB0F, GR64, GR64>; +} + +// Trap. +let hasSideEffects = 1 in { + def TRAP2 : SideEffectInherentE<"trap2", 0x01FF>; + def TRAP4 : SideEffectAddressS<"trap4", 0xB2FF, null_frag>; +} + +// Signal processor. +let hasSideEffects = 1, Defs = [CC] in + def SIGP : SideEffectTernaryRS<"sigp", 0xAE, GR64, GR64>; + +// Signal adapter. +let hasSideEffects = 1, Uses = [R0D, R1D, R2D, R3D], Defs = [CC] in + def SIGA : SideEffectAddressS<"siga", 0xB274, null_frag>; + +// Start interpretive execution. +let hasSideEffects = 1, Defs = [CC] in + def SIE : SideEffectUnaryS<"sie", 0xB214, null_frag, 0>; + +//===----------------------------------------------------------------------===// +// CPU-Measurement Facility Instructions (SA23-2260). +//===----------------------------------------------------------------------===// + +// Load program parameter +let hasSideEffects = 1 in + def LPP : SideEffectUnaryS<"lpp", 0xB280, null_frag, 8>; + +// Extract coprocessor-group address. +let hasSideEffects = 1, Defs = [CC] in + def ECPGA : UnaryRRE<"ecpga", 0xB2ED, null_frag, GR32, GR64>; + +// Extract CPU counter. +let hasSideEffects = 1, Defs = [CC] in + def ECCTR : UnaryRRE<"ecctr", 0xB2E4, null_frag, GR64, GR64>; + +// Extract peripheral counter. +let hasSideEffects = 1, Defs = [CC] in + def EPCTR : UnaryRRE<"epctr", 0xB2E5, null_frag, GR64, GR64>; + +// Load CPU-counter-set controls. +let hasSideEffects = 1, Defs = [CC] in + def LCCTL : SideEffectUnaryS<"lcctl", 0xB284, null_frag, 8>; + +// Load peripheral-counter-set controls. +let hasSideEffects = 1, Defs = [CC] in + def LPCTL : SideEffectUnaryS<"lpctl", 0xB285, null_frag, 8>; + +// Load sampling controls. +let hasSideEffects = 1, Defs = [CC] in + def LSCTL : SideEffectUnaryS<"lsctl", 0xB287, null_frag, 0>; + +// Query sampling information. +let hasSideEffects = 1 in + def QSI : StoreInherentS<"qsi", 0xB286, null_frag, 0>; + +// Query counter information. +let hasSideEffects = 1 in + def QCTRI : StoreInherentS<"qctri", 0xB28E, null_frag, 0>; + +// Set CPU counter. +let hasSideEffects = 1, Defs = [CC] in + def SCCTR : SideEffectBinaryRRE<"scctr", 0xB2E0, GR64, GR64>; + +// Set peripheral counter. +let hasSideEffects = 1, Defs = [CC] in + def SPCTR : SideEffectBinaryRRE<"spctr", 0xB2E1, GR64, GR64>; + +//===----------------------------------------------------------------------===// +// I/O Instructions (Principles of Operation, Chapter 14). +//===----------------------------------------------------------------------===// + +// Clear subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def CSCH : SideEffectInherentS<"csch", 0xB230, null_frag>; + +// Halt subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def HSCH : SideEffectInherentS<"hsch", 0xB231, null_frag>; + +// Modify subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def MSCH : SideEffectUnaryS<"msch", 0xB232, null_frag, 0>; + +// Resume subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def RSCH : SideEffectInherentS<"rsch", 0xB238, null_frag>; + +// Start subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def SSCH : SideEffectUnaryS<"ssch", 0xB233, null_frag, 0>; + +// Store subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def STSCH : StoreInherentS<"stsch", 0xB234, null_frag, 0>; + +// Test subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def TSCH : StoreInherentS<"tsch", 0xB235, null_frag, 0>; + +// Cancel subchannel. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def XSCH : SideEffectInherentS<"xsch", 0xB276, null_frag>; + +// Reset channel path. +let hasSideEffects = 1, Uses = [R1L], Defs = [CC] in + def RCHP : SideEffectInherentS<"rchp", 0xB23B, null_frag>; + +// Set channel monitor. +let hasSideEffects = 1, mayLoad = 1, Uses = [R1L, R2D] in + def SCHM : SideEffectInherentS<"schm", 0xB23C, null_frag>; + +// Store channel path status. +let hasSideEffects = 1 in + def STCPS : StoreInherentS<"stcps", 0xB23A, null_frag, 0>; + +// Store channel report word. +let hasSideEffects = 1, Defs = [CC] in + def STCRW : StoreInherentS<"stcrw", 0xB239, null_frag, 0>; + +// Test pending interruption. +let hasSideEffects = 1, Defs = [CC] in + def TPI : StoreInherentS<"tpi", 0xB236, null_frag, 0>; + +// Set address limit. +let hasSideEffects = 1, Uses = [R1L] in + def SAL : SideEffectInherentS<"sal", 0xB237, null_frag>; + diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 47d2f75cc11a..36809ea81dc1 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -304,3 +304,13 @@ foreach I = 0-15 in { defm AR32 : SystemZRegClass<"AR32", [i32], 32, (add (sequence "A%u", 0, 15)), 0>; +// Control registers. +class CREG64<bits<16> num, string n> : SystemZReg<n> { + let HWEncoding = num; +} +foreach I = 0-15 in { + def C#I : CREG64<I, "c"#I>, DwarfRegNum<[!add(I, 32)]>; +} +defm CR64 : SystemZRegClass<"CR64", [i64], 64, + (add (sequence "C%u", 0, 15)), 0>; + diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td index 5f5f2f690e58..adc9f2976f87 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -353,6 +353,9 @@ def : InstRW<[FXa], (instregex "ALGF(I|R)$")>; def : InstRW<[FXa], (instregex "ALGR(K)?$")>; def : InstRW<[FXa], (instregex "ALR(K)?$")>; def : InstRW<[FXa], (instregex "AR(K)?$")>; +def : InstRW<[FXa], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXa], (instregex "ALSIH(N)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>; // Logical addition with carry @@ -376,6 +379,8 @@ def : InstRW<[FXa], (instregex "SLGF(I|R)$")>; def : InstRW<[FXa], (instregex "SLGR(K)?$")>; def : InstRW<[FXa], (instregex "SLR(K)?$")>; def : InstRW<[FXa], (instregex "SR(K)?$")>; +def : InstRW<[FXa], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXa, Lat2], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>; @@ -506,6 +511,8 @@ def : InstRW<[FXb], (instregex "CLIH$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXb], (instregex "CLR$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXb], (instregex "C(L)?HHR$")>; +def : InstRW<[FXb, Lat2], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>; @@ -701,38 +708,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; -// Move with key -def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVCK$")>; - -// Monitor call -def : InstRW<[FXb], (instregex "MC$")>; - -// Extract CPU attribute -def : InstRW<[FXb, Lat30], (instregex "ECAG$")>; - -// Extract CPU Time -def : InstRW<[FXa, Lat5, LSU], (instregex "ECTG$")>; - -// Extract PSW -def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; - // Execute def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; -// Program return -def : InstRW<[FXb, Lat30], (instregex "PR$")>; - -// Inline assembly -def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], - (instregex "STCK(F)?$")>; -def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], - (instregex "STCKE$")>; -def : InstRW<[FXa, LSU, Lat5], (instregex "STFLE$")>; -def : InstRW<[FXb, Lat30], (instregex "SVC$")>; - -// Store real address -def : InstRW<[FXb, LSU, Lat5], (instregex "STRAG$")>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -1364,5 +1342,162 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>; def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>; def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXa, Lat3], (instregex "IPK$")>; +def : InstRW<[LSU], (instregex "SPKA$")>; +def : InstRW<[LSU], (instregex "SSM$")>; +def : InstRW<[FXb], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXa, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXb, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXb, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXb, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXb, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXb, Lat30], (instregex "TB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[FXb, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXb, Lat30], (instregex "PR$")>; +def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXb, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXb, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>; +def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], + (instregex "STCK(F)?$")>; +def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], + (instregex "STCKE$")>; +def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXb, Lat30], (instregex "PTF$")>; +def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "SVC$")>; +def : InstRW<[FXb], (instregex "MC$")>; +def : InstRW<[FXb, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXb], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXb, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb], (instregex "LPP$")>; +def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXb, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXb, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXb, Lat30], (instregex "SAL$")>; + } diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td index 126eac2e2072..128049a09086 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -310,6 +310,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>; def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; +def : InstRW<[FXU], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXU, FXU, Lat3], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXU], (instregex "ALSIH(N)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; // Logical addition with carry @@ -333,6 +336,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>; def : InstRW<[FXU], (instregex "SLGR(K)?$")>; def : InstRW<[FXU], (instregex "SLR(K)?$")>; def : InstRW<[FXU], (instregex "SR(K)?$")>; +def : InstRW<[FXU], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXU, FXU, Lat3], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>; @@ -468,6 +473,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXU], (instregex "CLR$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXU], (instregex "C(L)?HHR$")>; +def : InstRW<[FXU, FXU, Lat3], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>; @@ -634,37 +641,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; -// Move with key -def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>; - -// Monitor call -def : InstRW<[FXU], (instregex "MC$")>; - -// Extract CPU attribute -def : InstRW<[FXU, Lat30], (instregex "ECAG$")>; - -// Extract CPU Time -def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>; - -// Extract PSW -def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; - // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; -// Program return -def : InstRW<[FXU, Lat30], (instregex "PR$")>; - -// Inline assembly -def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>; -def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>; -def : InstRW<[LSU, FXU, Lat5], (instregex "STCKE$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>; -def : InstRW<[FXU, Lat30], (instregex "SVC$")>; - -// Store real address -def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -1058,5 +1037,160 @@ def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>; def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXU, Lat3], (instregex "IPK$")>; +def : InstRW<[LSU], (instregex "SPKA$")>; +def : InstRW<[LSU], (instregex "SSM$")>; +def : InstRW<[FXU], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXU, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXU, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXU, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXU, Lat30], (instregex "TB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXU, Lat30], (instregex "PR$")>; +def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXU, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>; +def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>; +def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STCKE$")>; +def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXU, Lat30], (instregex "PTF$")>; +def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "SVC$")>; +def : InstRW<[FXU], (instregex "MC$")>; +def : InstRW<[FXU, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXU], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXU, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU], (instregex "LPP$")>; +def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXU, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXU, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXU, Lat30], (instregex "SAL$")>; + } diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td index d38ca64d2e9b..76b378454631 100644 --- a/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -320,6 +320,9 @@ def : InstRW<[FXU], (instregex "ALGF(I|R)$")>; def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; +def : InstRW<[FXU], (instregex "A(L)?HHHR$")>; +def : InstRW<[FXU, Lat2], (instregex "A(L)?HHLR$")>; +def : InstRW<[FXU], (instregex "ALSIH(N)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; // Logical addition with carry @@ -343,6 +346,8 @@ def : InstRW<[FXU], (instregex "SLGF(I|R)$")>; def : InstRW<[FXU], (instregex "SLGR(K)?$")>; def : InstRW<[FXU], (instregex "SLR(K)?$")>; def : InstRW<[FXU], (instregex "SR(K)?$")>; +def : InstRW<[FXU], (instregex "S(L)?HHHR$")>; +def : InstRW<[FXU, Lat2], (instregex "S(L)?HHLR$")>; // Subtraction with borrow def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>; @@ -478,6 +483,8 @@ def : InstRW<[FXU], (instregex "CLIH$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>; def : InstRW<[FXU], (instregex "CLR$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>; +def : InstRW<[FXU], (instregex "C(L)?HHR$")>; +def : InstRW<[FXU, Lat2], (instregex "C(L)?HLR$")>; // Compare halfword def : InstRW<[FXU, LSU, Lat6], (instregex "CH(Y|RL)?$")>; @@ -672,37 +679,9 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; -// Move with key -def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>; - -// Monitor call -def : InstRW<[FXU], (instregex "MC$")>; - -// Extract CPU attribute -def : InstRW<[FXU, Lat30], (instregex "ECAG$")>; - -// Extract CPU Time -def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>; - -// Extract PSW -def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; - // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; -// Program return -def : InstRW<[FXU, Lat30], (instregex "PR$")>; - -// Inline assembly -def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>; -def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone], - (instregex "STCKE$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>; -def : InstRW<[FXU, Lat30], (instregex "SVC$")>; - -// Store real address -def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>; - //===----------------------------------------------------------------------===// // .insn directive instructions //===----------------------------------------------------------------------===// @@ -1102,5 +1081,161 @@ def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>; def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + +// -------------------------------- System ---------------------------------- // + +//===----------------------------------------------------------------------===// +// System: Program-Status Word Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPSW(E)?$")>; +def : InstRW<[FXU, Lat3], (instregex "IPK$")>; +def : InstRW<[LSU], (instregex "SPKA$")>; +def : InstRW<[LSU], (instregex "SSM$")>; +def : InstRW<[FXU], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXU, Lat3], (instregex "IAC$")>; +def : InstRW<[LSU], (instregex "SAC(F)?$")>; + +//===----------------------------------------------------------------------===// +// System: Control Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LCTL(G)?$")>; +def : InstRW<[LSU, Lat30], (instregex "STCT(L|G)$")>; +def : InstRW<[LSU], (instregex "E(P|S)A(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "SSA(I)?R$")>; +def : InstRW<[FXU, Lat30], (instregex "ESEA$")>; + +//===----------------------------------------------------------------------===// +// System: Prefix-Register Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "SPX$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STPX$")>; + +//===----------------------------------------------------------------------===// +// System: Storage-Key and Real Memory Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "ISKE$")>; +def : InstRW<[FXU, Lat30], (instregex "IVSK$")>; +def : InstRW<[FXU, Lat30], (instregex "SSKE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "RRB(E|M)$")>; +def : InstRW<[FXU, Lat30], (instregex "PFMF$")>; +def : InstRW<[FXU, Lat30], (instregex "TB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGIN$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PGOUT$")>; + +//===----------------------------------------------------------------------===// +// System: Dynamic-Address-Translation Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "IPTE(Opt)?(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "IDTE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "CRDTE(Opt)?$")>; +def : InstRW<[FXU, Lat30], (instregex "PTLB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "CSP(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LPTEA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LRA(Y|G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STRAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "LURA(G)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STUR(A|G)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPROT$")>; + +//===----------------------------------------------------------------------===// +// System: Memory-move Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; +def : InstRW<[LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "MVCOS$")>; +def : InstRW<[LSU, Lat30], (instregex "MVPG$")>; + +//===----------------------------------------------------------------------===// +// System: Address-Space Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "LASP$")>; +def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "PC$")>; +def : InstRW<[FXU, Lat30], (instregex "PR$")>; +def : InstRW<[FXU, Lat30], (instregex "PT(I)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "RP$")>; +def : InstRW<[FXU, Lat30], (instregex "BS(G|A)$")>; +def : InstRW<[FXU, Lat20], (instregex "TAR$")>; + +//===----------------------------------------------------------------------===// +// System: Linkage-Stack Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXU, Lat30], (instregex "EREG(G)?$")>; +def : InstRW<[FXU, Lat30], (instregex "(E|M)STA$")>; + +//===----------------------------------------------------------------------===// +// System: Time-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "PTFF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCK$")>; +def : InstRW<[FXU, Lat30], (instregex "SCKPF$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SCKC$")>; +def : InstRW<[FXU, LSU, Lat20], (instregex "SPT$")>; +def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>; +def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone], + (instregex "STCKE$")>; +def : InstRW<[FXU, LSU, Lat9], (instregex "STCKC$")>; +def : InstRW<[FXU, LSU, Lat8], (instregex "STPT$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Related Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, LSU, Lat30], (instregex "STAP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STIDP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STSI$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STFL(E)?$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECAG$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXU, Lat30], (instregex "PTF$")>; +def : InstRW<[FXU, Lat30], (instregex "PCKMO$")>; + +//===----------------------------------------------------------------------===// +// System: Miscellaneous Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "SVC$")>; +def : InstRW<[FXU], (instregex "MC$")>; +def : InstRW<[FXU, Lat30], (instregex "DIAG$")>; +def : InstRW<[FXU], (instregex "TRAC(E|G)$")>; +def : InstRW<[FXU, Lat30], (instregex "TRAP(2|4)$")>; +def : InstRW<[FXU, Lat30], (instregex "SIGP$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIGA$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "SIE$")>; + +//===----------------------------------------------------------------------===// +// System: CPU-Measurement Facility Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU], (instregex "LPP$")>; +def : InstRW<[FXU, Lat30], (instregex "ECPGA$")>; +def : InstRW<[FXU, Lat30], (instregex "E(C|P)CTR$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "Q(S|CTR)I$")>; +def : InstRW<[FXU, Lat30], (instregex "S(C|P)CTR$")>; + +//===----------------------------------------------------------------------===// +// System: I/O Instructions +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30], (instregex "(C|H|R|X)SCH$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "(M|S|ST|T)SCH$")>; +def : InstRW<[FXU, Lat30], (instregex "RCHP$")>; +def : InstRW<[FXU, Lat30], (instregex "SCHM$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "STC(PS|RW)$")>; +def : InstRW<[FXU, LSU, Lat30], (instregex "TPI$")>; +def : InstRW<[FXU, Lat30], (instregex "SAL$")>; + } diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index 0ab0c2f25915..eb4a0962f7eb 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -37,12 +37,13 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, const TargetMachine &TM) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasPopulationCount(false), HasMessageSecurityAssist4(false), + HasPopulationCount(false), HasMessageSecurityAssist3(false), + HasMessageSecurityAssist4(false), HasResetReferenceBitsMultiple(false), HasFastSerialization(false), HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), HasExecutionHint(false), HasLoadAndTrap(false), HasTransactionalExecution(false), HasProcessorAssist(false), - HasDFPZonedConversion(false), + HasDFPZonedConversion(false), HasEnhancedDAT2(false), HasVector(false), HasLoadStoreOnCond2(false), HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false), HasDFPPackedConversion(false), diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index be480f03c572..b05a1bb6cafd 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -39,7 +39,9 @@ protected: bool HasHighWord; bool HasFPExtension; bool HasPopulationCount; + bool HasMessageSecurityAssist3; bool HasMessageSecurityAssist4; + bool HasResetReferenceBitsMultiple; bool HasFastSerialization; bool HasInterlockedAccess1; bool HasMiscellaneousExtensions; @@ -48,6 +50,7 @@ protected: bool HasTransactionalExecution; bool HasProcessorAssist; bool HasDFPZonedConversion; + bool HasEnhancedDAT2; bool HasVector; bool HasLoadStoreOnCond2; bool HasLoadAndZeroRightmostByte; @@ -109,9 +112,18 @@ public: bool hasPopulationCount() const { return HasPopulationCount; } // Return true if the target has the message-security-assist + // extension facility 3. + bool hasMessageSecurityAssist3() const { return HasMessageSecurityAssist3; } + + // Return true if the target has the message-security-assist // extension facility 4. bool hasMessageSecurityAssist4() const { return HasMessageSecurityAssist4; } + // Return true if the target has the reset-reference-bits-multiple facility. + bool hasResetReferenceBitsMultiple() const { + return HasResetReferenceBitsMultiple; + } + // Return true if the target has the fast-serialization facility. bool hasFastSerialization() const { return HasFastSerialization; } @@ -138,6 +150,9 @@ public: // Return true if the target has the DFP zoned-conversion facility. bool hasDFPZonedConversion() const { return HasDFPZonedConversion; } + // Return true if the target has the enhanced-DAT facility 2. + bool hasEnhancedDAT2() const { return HasEnhancedDAT2; } + // Return true if the target has the load-and-zero-rightmost-byte facility. bool hasLoadAndZeroRightmostByte() const { return HasLoadAndZeroRightmostByte; diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 422c16b8eb62..ce5c57e0f519 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -238,7 +238,7 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } -void SystemZTTIImpl::getUnrollingPreferences(Loop *L, +void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Find out if L contains a call, what the machine instruction count // estimate is, and how many stores there are. diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index bdba7601eb78..6923fc6fc910 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -45,7 +45,8 @@ public: TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); - void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); /// @} diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 39cb1ca336f2..129794171464 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -57,17 +57,19 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops), } } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 -// Placemarkers to indicate the start or end of a block or loop scope. These -// use/clobber VALUE_STACK to prevent them from being moved into the middle of -// an expression tree. +// Placemarkers to indicate the start or end of a block, loop, or try scope. +// These use/clobber VALUE_STACK to prevent them from being moved into the +// middle of an expression tree. let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in { def BLOCK : I<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>; def LOOP : I<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>; +def TRY : I<(outs), (ins Signature:$sig), [], "try \t$sig", 0x06>; -// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode -// in wasm. +// END_BLOCK, END_LOOP, END_TRY, and END_FUNCTION are represented with the same +// opcode in wasm. def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>; def END_LOOP : I<(outs), (ins), [], "end_loop", 0x0b>; +def END_TRY : I<(outs), (ins), [], "end_try", 0x0b>; let isTerminator = 1, isBarrier = 1 in def END_FUNCTION : I<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] @@ -112,6 +114,20 @@ let isReturn = 1 in { def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable", 0x00>; +def THROW_I32 : I<(outs), (ins i32imm:$tag, I32:$obj), + [(int_wasm_throw imm:$tag, I32:$obj)], "throw \t$tag, $obj", + 0x08>; +def THROW_I64 : I<(outs), (ins i32imm:$tag, I64:$obj), + [(int_wasm_throw imm:$tag, I64:$obj)], "throw \t$tag, $obj", + 0x08>; +def RETHROW : I<(outs), (ins i32imm:$rel_depth), [], "rethrow \t$rel_depth", + 0x09>; + } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 } // Defs = [ARGUMENTS] + +// rethrow takes a relative depth as an argument, for which currently only 0 is +// possible for C++. Once other languages need depths other than 0, depths will +// be computed in CFGStackify. +def : Pat<(int_wasm_rethrow), (RETHROW 0)>; diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index 947c0329bb6e..f0b6a3e35dba 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -897,7 +897,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { } } - // Look for orphan landingpads, can occur in blocks with no predecesors + // Look for orphan landingpads, can occur in blocks with no predecessors for (BasicBlock &BB : F) { Instruction *I = BB.getFirstNonPHI(); if (auto *LPI = dyn_cast<LandingPadInst>(I)) diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d30cc724c203..825f23dc52d9 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -49,8 +49,11 @@ static const char OpPrecedence[] = { 4, // IC_MINUS 5, // IC_MULTIPLY 5, // IC_DIVIDE - 6, // IC_RPAREN - 7, // IC_LPAREN + 5, // IC_MOD + 6, // IC_NOT + 7, // IC_NEG + 8, // IC_RPAREN + 9, // IC_LPAREN 0, // IC_IMM 0 // IC_REGISTER }; @@ -92,6 +95,9 @@ private: IC_MINUS, IC_MULTIPLY, IC_DIVIDE, + IC_MOD, + IC_NOT, + IC_NEG, IC_RPAREN, IC_LPAREN, IC_IMM, @@ -111,6 +117,10 @@ private: SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; SmallVector<ICToken, 4> PostfixStack; + bool isUnaryOperator(const InfixCalculatorTok Op) { + return Op == IC_NEG || Op == IC_NOT; + } + public: int64_t popOperand() { assert (!PostfixStack.empty() && "Poped an empty stack!"); @@ -192,6 +202,22 @@ private: ICToken Op = PostfixStack[i]; if (Op.first == IC_IMM || Op.first == IC_REGISTER) { OperandStack.push_back(Op); + } else if (isUnaryOperator(Op.first)) { + assert (OperandStack.size() > 0 && "Too few operands."); + ICToken Operand = OperandStack.pop_back_val(); + assert (Operand.first == IC_IMM && + "Unary operation with a register!"); + switch (Op.first) { + default: + report_fatal_error("Unexpected operator!"); + break; + case IC_NEG: + OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); + break; + case IC_NOT: + OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); + break; + } } else { assert (OperandStack.size() > 1 && "Too few operands."); int64_t Val; @@ -222,6 +248,12 @@ private: Val = Op1.second / Op2.second; OperandStack.push_back(std::make_pair(IC_IMM, Val)); break; + case IC_MOD: + assert (Op1.first == IC_IMM && Op2.first == IC_IMM && + "Modulo operation with an immediate and a register!"); + Val = Op1.second % Op2.second; + OperandStack.push_back(std::make_pair(IC_IMM, Val)); + break; case IC_OR: assert (Op1.first == IC_IMM && Op2.first == IC_IMM && "Or operation with an immediate and a register!"); @@ -271,6 +303,7 @@ private: IES_NOT, IES_MULTIPLY, IES_DIVIDE, + IES_MOD, IES_LBRAC, IES_RBRAC, IES_LPAREN, @@ -421,10 +454,16 @@ private: default: State = IES_ERROR; break; + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: case IES_PLUS: case IES_NOT: case IES_MULTIPLY: case IES_DIVIDE: + case IES_MOD: case IES_LPAREN: case IES_RPAREN: case IES_LBRAC: @@ -432,11 +471,12 @@ private: case IES_INTEGER: case IES_REGISTER: State = IES_MINUS; - // Only push the minus operator if it is not a unary operator. - if (!(CurrState == IES_PLUS || CurrState == IES_MINUS || - CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE || - CurrState == IES_LPAREN || CurrState == IES_LBRAC)) + // push minus operator if it is not a negate operator + if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || + CurrState == IES_INTEGER || CurrState == IES_RBRAC) IC.pushOperator(IC_MINUS); + else + IC.pushOperator(IC_NEG); if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { // If we already have a BaseReg, then assume this is the IndexReg with // a scale of 1. @@ -458,9 +498,21 @@ private: default: State = IES_ERROR; break; + case IES_OR: + case IES_XOR: + case IES_AND: + case IES_LSHIFT: + case IES_RSHIFT: case IES_PLUS: + case IES_MINUS: case IES_NOT: + case IES_MULTIPLY: + case IES_DIVIDE: + case IES_MOD: + case IES_LPAREN: + case IES_LBRAC: State = IES_NOT; + IC.pushOperator(IC_NOT); break; } PrevState = CurrState; @@ -525,6 +577,7 @@ private: case IES_LSHIFT: case IES_RSHIFT: case IES_DIVIDE: + case IES_MOD: case IES_MULTIPLY: case IES_LPAREN: State = IES_INTEGER; @@ -539,26 +592,6 @@ private: } // Get the scale and replace the 'Register * Scale' with '0'. IC.popOperator(); - } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - CurrState == IES_MINUS) { - // Unary minus. No need to pop the minus operand because it was never - // pushed. - IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. - } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - CurrState == IES_NOT) { - // Unary not. No need to pop the not operand because it was never - // pushed. - IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. } else { IC.pushOperand(IC_IMM, TmpInt); } @@ -594,6 +627,19 @@ private: break; } } + void onMod() { + PrevState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_INTEGER: + case IES_RPAREN: + State = IES_MOD; + IC.pushOperator(IC_MOD); + break; + } + } void onLBrac() { PrevState = State; switch (State) { @@ -647,18 +693,8 @@ private: case IES_RSHIFT: case IES_MULTIPLY: case IES_DIVIDE: + case IES_MOD: case IES_LPAREN: - // FIXME: We don't handle this type of unary minus or not, yet. - if ((PrevState == IES_PLUS || PrevState == IES_MINUS || - PrevState == IES_OR || PrevState == IES_AND || - PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || - PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC || - PrevState == IES_NOT || PrevState == IES_XOR) && - (CurrState == IES_MINUS || CurrState == IES_NOT)) { - State = IES_ERROR; - break; - } State = IES_LPAREN; IC.pushOperator(IC_LPAREN); break; @@ -1302,6 +1338,8 @@ bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine SM.onXor(); else if (Name.equals_lower("and")) SM.onAnd(); + else if (Name.equals_lower("mod")) + SM.onMod(); else return false; return true; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index caf98bffb80d..8f2017e990c5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -396,7 +396,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (!SB->getFragment()) { Asm.getContext().reportError( Fixup.getLoc(), - "symbol '" + B->getSymbol().getName() + + "symbol '" + SB->getName() + "' can not be undefined in a subtraction expression"); return false; } @@ -408,7 +408,7 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, // pedantic compatibility with 'as'. Type = A->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF : (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF; - Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); + Value2 = Writer->getSymbolAddress(*SB, Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } @@ -468,8 +468,8 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && - !is64Bit() && + const MCSymbolRefExpr *SymA = Target.getSymA(); + assert(SymA->getKind() == MCSymbolRefExpr::VK_TLVP && !is64Bit() && "Should only be called with a 32-bit TLVP relocation!"); unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); @@ -480,15 +480,14 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, // subtraction from the picbase. For 32-bit pic the addend is the difference // between the picbase and the next address. For 32-bit static the addend is // zero. - if (Target.getSymB()) { + if (auto *SymB = Target.getSymB()) { // If this is a subtraction then we're pcrel. uint32_t FixupAddress = Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); IsPCRel = 1; - FixedValue = - FixupAddress - - Writer->getSymbolAddress(Target.getSymB()->getSymbol(), Layout) + - Target.getConstant(); + FixedValue = FixupAddress - + Writer->getSymbolAddress(SymB->getSymbol(), Layout) + + Target.getConstant(); FixedValue += 1ULL << Log2Size; } else { FixedValue = 0; @@ -499,8 +498,7 @@ void X86MachObjectWriter::recordTLVPRelocation(MachObjectWriter *Writer, MRE.r_word0 = Value; MRE.r_word1 = (IsPCRel << 24) | (Log2Size << 25) | (MachO::GENERIC_RELOC_TLV << 28); - Writer->addRelocation(&Target.getSymA()->getSymbol(), Fragment->getParent(), - MRE); + Writer->addRelocation(&SymA->getSymbol(), Fragment->getParent(), MRE); } void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 5892f1de33ee..807f7a6ddb19 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -44,7 +44,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, const MCAsmBackend &MAB) const { unsigned FixupKind = Fixup.getKind(); if (IsCrossSection) { - if (FixupKind != FK_Data_4) { + if (FixupKind != FK_Data_4 && FixupKind != llvm::X86::reloc_signed_4byte) { Ctx.reportError(Fixup.getLoc(), "Cannot represent this expression"); return COFF::IMAGE_REL_AMD64_ADDR32; } diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index fe105298f5c1..7437ebacfac3 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -300,6 +300,8 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", "Intel Atom processors">; def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM", "Intel Silvermont processors">; +def ProcIntelGLM : SubtargetFeature<"glm", "X86ProcFamily", "IntelGLM", + "Intel Goldmont processors">; class Proc<string Name, list<SubtargetFeature> Features> : ProcessorModel<Name, GenericModel, Features>; @@ -430,6 +432,34 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [ def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. +class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [ + ProcIntelGLM, + FeatureX87, + FeatureMMX, + FeatureSSE42, + FeatureFXSR, + FeatureCMPXCHG16B, + FeatureMOVBE, + FeaturePOPCNT, + FeaturePCLMUL, + FeatureAES, + FeaturePRFCHW, + FeatureCallRegIndirect, + FeatureSlowLEA, + FeatureSlowIncDec, + FeatureSlowBTMem, + FeatureLAHFSAHF, + FeatureMPX, + FeatureSHA, + FeatureRDSEED, + FeatureXSAVE, + FeatureXSAVEOPT, + FeatureXSAVEC, + FeatureXSAVES, + FeatureCLFLUSHOPT +]>; +def : GoldmontProc<"goldmont">; + // "Arrandale" along with corei3 and corei5 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureX87, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f777e5628988..b89914f8893e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5065,6 +5065,20 @@ static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } +// Return true if the instruction zeroes the unused upper part of the +// destination and accepts mask. +static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { + switch (Opcode) { + default: + return false; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return true; + } +} + /// Insert i1-subvector to i1-vector. static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -5097,6 +5111,22 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, // 3. Subvector should be inserted in the middle (for example v2i1 // to v16i1, index 2) + // If this node widens - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, mark this node as legal to enable replacing them with + // the v8i1 version of the previous instruction during instruction selection. + // For example, VPCMPEQDZ128rr instruction stores its v4i1 result in a k-reg, + // while zeroing all the upper remaining 60 bits of the register. if the + // result of such instruction is inserted into an allZeroVector, then we can + // safely remove insert_vector (in instruction selection) as the cmp instr + // already zeroed the rest of the register. + if (ISD::isBuildVectorAllZeros(Vec.getNode()) && IdxVal == 0 && + (isMaskedZeroUpperBitsvXi1(SubVec.getOpcode()) || + (SubVec.getOpcode() == ISD::AND && + (isMaskedZeroUpperBitsvXi1(SubVec.getOperand(0).getOpcode()) || + isMaskedZeroUpperBitsvXi1(SubVec.getOperand(1).getOpcode()))))) + return Op; + // extend to natively supported kshift MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; MVT WideOpVT = OpVT; @@ -7919,6 +7949,60 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); } +// Return true if all the operands of the given CONCAT_VECTORS node are zeros +// except for the first one. (CONCAT_VECTORS Op, 0, 0,...,0) +static bool isExpandWithZeros(const SDValue &Op) { + assert(Op.getOpcode() == ISD::CONCAT_VECTORS && + "Expand with zeros only possible in CONCAT_VECTORS nodes!"); + + for (unsigned i = 1; i < Op.getNumOperands(); i++) + if (!ISD::isBuildVectorAllZeros(Op.getOperand(i).getNode())) + return false; + + return true; +} + +// Returns true if the given node is a type promotion (by concatenating i1 +// zeros) of the result of a node that already zeros all upper bits of +// k-register. +static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) { + unsigned Opc = Op.getOpcode(); + + assert(Opc == ISD::CONCAT_VECTORS && + Op.getSimpleValueType().getVectorElementType() == MVT::i1 && + "Unexpected node to check for type promotion!"); + + // As long as we are concatenating zeros to the upper part of a previous node + // result, climb up the tree until a node with different opcode is + // encountered + while (Opc == ISD::INSERT_SUBVECTOR || Opc == ISD::CONCAT_VECTORS) { + if (Opc == ISD::INSERT_SUBVECTOR) { + if (ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode()) && + Op.getConstantOperandVal(2) == 0) + Op = Op.getOperand(1); + else + return SDValue(); + } else { // Opc == ISD::CONCAT_VECTORS + if (isExpandWithZeros(Op)) + Op = Op.getOperand(0); + else + return SDValue(); + } + Opc = Op.getOpcode(); + } + + // Check if the first inserted node zeroes the upper bits, or an 'and' result + // of a node that zeros the upper bits (its masked version). + if (isMaskedZeroUpperBitsvXi1(Op.getOpcode()) || + (Op.getOpcode() == ISD::AND && + (isMaskedZeroUpperBitsvXi1(Op.getOperand(0).getOpcode()) || + isMaskedZeroUpperBitsvXi1(Op.getOperand(1).getOpcode())))) { + return Op; + } + + return SDValue(); +} + static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { @@ -7929,6 +8013,17 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); + // If this node promotes - by concatenating zeroes - the type of the result + // of a node with instruction that zeroes all upper (irrelevant) bits of the + // output register, mark it as legal and catch the pattern in instruction + // selection to avoid emitting extra insturctions (for zeroing upper bits). + if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) { + SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64); + SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted, + ZeroC); + } + SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { // Specialize the cases when all, or all but one, of the operands are undef. @@ -27012,6 +27107,9 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); + unsigned InputSizeInBits = MaskVT.getSizeInBits(); + unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; + MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); bool ContainsZeros = false; APInt Zeroable(NumMaskElts, false); @@ -27027,7 +27125,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, - MaskVT.getScalarSizeInBits(), Mask, + MaskScalarSizeInBits, Mask, 0, Zeroable, Subtarget); if (0 < ShiftAmt) { PermuteImm = (unsigned)ShiftAmt; @@ -27043,10 +27141,6 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, return SM_SentinelUndef <= M && M < (int)NumMaskElts; }) && "Expected unary shuffle"); - unsigned InputSizeInBits = MaskVT.getSizeInBits(); - unsigned MaskScalarSizeInBits = InputSizeInBits / Mask.size(); - MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); - // Handle PSHUFLW/PSHUFHW repeated patterns. if (MaskScalarSizeInBits == 16) { SmallVector<int, 4> RepeatedMask; @@ -35072,7 +35166,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, /// that is commonly recognized as an idiom (has no register dependency), so /// that's better/smaller than loading a splat 1 constant. static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { - assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB && + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unexpected opcode for increment/decrement transform"); // Pseudo-legality check: getOnesVector() expects one of these types, so bail diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 01a70323224c..cc5c09cbf0e5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -185,6 +185,20 @@ def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info, def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info, v2f64x_info>; +class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm, + ValueType _vt> { + RegisterClass KRC = _krc; + RegisterClass KRCWM = _krcwm; + ValueType KVT = _vt; +} + +def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>; +def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>; +def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>; +def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>; +def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>; +def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>; + // This multiclass generates the masking variants from the non-masking // variant. It only provides the assembly pieces for the masking variants. // It assumes custom ISel patterns for masking which can be provided as @@ -1735,17 +1749,217 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; -let Predicates = [HasAVX512, NoVLX] in { -def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; -} +multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask, + _.RC:$src1, _.RC:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> + : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)))))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask, + _.RC:$src1, addr:$src2), + NewInf.KRC)>; +} +} + +// VPCMPEQB - i8 +defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQBZ256", [HasBWI, HasVLX]>; + +// VPCMPEQW - i16 +defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm, + "VPCMPEQWZ", [HasBWI]>; + +// VPCMPEQD - i32 +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm, + "VPCMPEQDZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm, + "VPCMPEQDZ", [HasAVX512]>; + +// VPCMPEQQ - i64 +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm, + "VPCMPEQQZ", [HasAVX512]>; + +// VPCMPGTB - i8 +defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTBZ256", [HasBWI, HasVLX]>; + +// VPCMPGTW - i16 +defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm, + "VPCMPGTWZ", [HasBWI]>; + +// VPCMPGTD - i32 +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm, + "VPCMPGTDZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm, + "VPCMPGTDZ", [HasAVX512]>; + +// VPCMPGTQ - i64 +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; +defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm, + "VPCMPGTQZ", [HasAVX512]>; multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -1908,6 +2122,237 @@ defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info, defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; +multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and _.KRCWM:$mask, + (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask, + _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (_.VT (bitconvert + (_.LdFrag addr:$src2))), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + SDNode OpNode, string InstrStr, + list<Predicate> Preds> + : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (and (_.KVT _.KRCWM:$mask), + (_.KVT (OpNode (_.VT _.RC:$src1), + (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + imm:$cc)))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask, + _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +// VPCMPB - i8 +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm, + "VPCMPBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm, + "VPCMPBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm, + "VPCMPBZ256", [HasBWI, HasVLX]>; + +// VPCMPW - i16 +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm, + "VPCMPWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm, + "VPCMPWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm, + "VPCMPWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm, + "VPCMPWZ", [HasBWI]>; + +// VPCMPD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm, + "VPCMPDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm, + "VPCMPDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm, + "VPCMPDZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm, + "VPCMPDZ", [HasAVX512]>; + +// VPCMPQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm, + "VPCMPQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm, + "VPCMPQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm, + "VPCMPQZ", [HasAVX512]>; + +// VPCMPUB - i8 +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu, + "VPCMPUBZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu, + "VPCMPUBZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu, + "VPCMPUBZ256", [HasBWI, HasVLX]>; + +// VPCMPUW - i16 +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu, + "VPCMPUWZ128", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu, + "VPCMPUWZ256", [HasBWI, HasVLX]>; +defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu, + "VPCMPUWZ256", [HasBWI, HasVLX]>; + +defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu, + "VPCMPUWZ", [HasBWI]>; + +// VPCMPUD - i32 +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu, + "VPCMPUDZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu, + "VPCMPUDZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu, + "VPCMPUDZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu, + "VPCMPUDZ", [HasAVX512]>; + +// VPCMPUQ - i64 +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu, + "VPCMPUQZ128", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu, + "VPCMPUQZ256", [HasAVX512, HasVLX]>; + +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; +defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu, + "VPCMPUQZ", [HasAVX512]>; + multiclass avx512_vcmp_common<X86VectorVTInfo _> { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, @@ -1998,21 +2443,108 @@ defm VCMPPD : avx512_vcmp<avx512vl_f64_info>, defm VCMPPS : avx512_vcmp<avx512vl_f32_info>, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; -def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; +multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + string InstrStr, list<Predicate> Preds> { +let Predicates = Preds in { + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (_.VT (bitconvert (_.LdFrag addr:$src2))), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; + + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpm (_.VT _.RC:$src1), + (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + imm:$cc)), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1, + addr:$src2, + imm:$cc), + NewInf.KRC)>; +} +} + +multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf, + string InstrStr, list<Predicate> Preds> + : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> { + +let Predicates = Preds in + def : Pat<(insert_subvector (NewInf.KVT immAllZerosV), + (_.KVT (X86cmpmRnd (_.VT _.RC:$src1), + (_.VT _.RC:$src2), + imm:$cc, + (i32 FROUND_NO_EXC))), + (i64 0)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1, + _.RC:$src2, + imm:$cc), + NewInf.KRC)>; +} + + +// VCMPPS - f32 +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ", + [HasAVX512]>; + +// VCMPPD - f64 +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; +defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256", + [HasAVX512, HasVLX]>; + +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ", + [HasAVX512]>; +defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ", + [HasAVX512]>; // ---------------------------------------------------------------- // FPClass @@ -2498,6 +3030,69 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>; +multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> { +def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr) + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), + (i8 8)), (i8 8))>; +} + +multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr, + AVX512VLVectorVTInfo _> { +def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), VK8)>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri) + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; + +def : Pat<(insert_subvector (v16i1 immAllZerosV), + (v8i1 (and VK8:$mask, + (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))), + (i64 0)), + (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik) + (COPY_TO_REGCLASS VK8:$mask, VK16), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + imm:$cc), + (i8 8)), (i8 8))>; +} + +let Predicates = [HasAVX512, NoVLX] in { + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">; + defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">; + + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>; + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>; + defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>; +} + // Mask setting all 0s or 1s multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> { let Predicates = [HasAVX512] in diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index f98c2a7e802d..e34a90e975b8 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -75,6 +75,8 @@ private: bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, @@ -270,6 +272,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectUadde(I, MRI, MF)) return true; + if (selectMergeValues(I, MRI, MF)) + return true; if (selectExtract(I, MRI, MF)) return true; if (selectInsert(I, MRI, MF)) @@ -914,6 +918,55 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectMergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_MERGE_VALUES) + return false; + + // Split to inserts. + unsigned DstReg = I.getOperand(0).getReg(); + unsigned SrcReg0 = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg0); + unsigned SrcSize = SrcTy.getSizeInBits(); + + const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + // For the first src use insertSubReg. + unsigned DefReg = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(DefReg, RegBank); + if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) + return false; + + for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { + + unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); + MRI.setRegBank(Tmp, RegBank); + + MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_INSERT), Tmp) + .addReg(DefReg) + .addReg(I.getOperand(Idx).getReg()) + .addImm((Idx - 1) * SrcSize); + + DefReg = Tmp; + + if (!select(InsertInst)) + return false; + } + + MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::COPY), DstReg) + .addReg(DefReg); + + if (!select(CopyInst)) + return false; + + I.eraseFromParent(); + return true; +} InstructionSelector * llvm::createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &Subtarget, diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index a584eabcc1b2..a5fa3340c3f1 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -56,7 +56,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32}) setAction({BinOp, Ty}, Legal); @@ -117,7 +117,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) for (auto Ty : {s8, s16, s32, s64}) setAction({BinOp, Ty}, Legal); @@ -228,10 +228,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX() { for (auto Ty : {v8s32, v4s64}) setAction({MemOp, Ty}, Legal); - for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) { setAction({G_INSERT, Ty}, Legal); - for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) { setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoAVX2() { @@ -280,10 +284,14 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { for (auto Ty : {v16s32, v8s64}) setAction({MemOp, Ty}, Legal); - for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) + for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) { setAction({G_INSERT, Ty}, Legal); - for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) + setAction({G_EXTRACT, 1, Ty}, Legal); + } + for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) { setAction({G_INSERT, 1, Ty}, Legal); + setAction({G_EXTRACT, Ty}, Legal); + } /************ VLX *******************/ if (!Subtarget.hasVLX()) diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e36a47506ba0..24845beac22d 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -11,10 +11,23 @@ // //===----------------------------------------------------------------------===// +#include "X86.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "X86CallLowering.h" +#include "X86LegalizerInfo.h" +#include "X86RegisterBankInfo.h" +#endif #include "X86Subtarget.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/IR/Attributes.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Function.h" @@ -336,6 +349,35 @@ X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct X86GISelActualAccessor : public GISelAccessor { + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + std::unique_ptr<InstructionSelector> InstSelector; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, const X86TargetMachine &TM, unsigned StackAlignOverride) @@ -360,6 +402,19 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, setPICStyle(PICStyles::StubPIC); else if (isTargetELF()) setPICStyle(PICStyles::GOT); +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + X86GISelActualAccessor *GISel = new X86GISelActualAccessor(); + + GISel->CallLoweringInfo.reset(new X86CallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new X86LegalizerInfo(*this, TM)); + + auto *RBI = new X86RegisterBankInfo(*getRegisterInfo()); + GISel->RegBankInfo.reset(RBI); + GISel->InstSelector.reset(createX86InstructionSelector(TM, *this, *RBI)); +#endif + setGISelAccessor(*GISel); } const CallLowering *X86Subtarget::getCallLowering() const { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 550e95c39ab5..fa0afe29586b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -58,7 +58,7 @@ protected: }; enum X86ProcFamilyEnum { - Others, IntelAtom, IntelSLM + Others, IntelAtom, IntelSLM, IntelGLM }; /// X86 processor family: Intel Atom, and others diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index a9f42cacf788..8d891c983fab 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -15,9 +15,6 @@ #include "X86.h" #include "X86CallLowering.h" #include "X86LegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "X86RegisterBankInfo.h" -#endif #include "X86MacroFusion.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -31,7 +28,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDepsFix.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -212,35 +208,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, X86TargetMachine::~X86TargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct X86GISelActualAccessor : public GISelAccessor { - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - std::unique_ptr<InstructionSelector> InstSelector; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const X86Subtarget * X86TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -280,20 +247,6 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { resetTargetOptions(F); I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this, Options.StackAlignmentOverride); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - X86GISelActualAccessor *GISel = new X86GISelActualAccessor(); - - GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this)); - - auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); - GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector( - *this, *I, *RBI)); -#endif - I->setGISelAccessor(*GISel); } return I.get(); } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 1bf267d34ec2..aaa6d58bd134 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -40,6 +40,7 @@ public: ~X86TargetMachine() override; const X86Subtarget *getSubtargetImpl(const Function &F) const override; + const X86Subtarget *getSubtargetImpl() const = delete; TargetIRAnalysis getTargetIRAnalysis() override; |