diff options
Diffstat (limited to 'llvm/lib/Target')
43 files changed, 1141 insertions, 562 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index b8953583a310..6da089d1859a 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1000,6 +1000,26 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; + case AArch64::HINT: { + // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for + // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be + // non-empty. If MI is the initial BTI, place the + // __patchable_function_entries label after BTI. + if (CurrentPatchableFunctionEntrySym && + CurrentPatchableFunctionEntrySym == CurrentFnBegin && + MI == &MF->front().front()) { + int64_t Imm = MI->getOperand(0).getImm(); + if ((Imm & 32) && (Imm & 6)) { + MCInst Inst; + MCInstLowering.Lower(MI, Inst); + EmitToStreamer(*OutStreamer, Inst); + CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); + OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); + return; + } + } + break; + } case AArch64::MOVMCSym: { Register DestReg = MI->getOperand(0).getReg(); const MachineOperand &MO_Sym = MI->getOperand(1); diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index bc91d628f0b4..cbca29b63b70 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -66,6 +66,10 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit", static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100), cl::Hidden); +// Enable register renaming to find additional store pairing opportunities. +static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming", + cl::init(false), cl::Hidden); + #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass" namespace { @@ -1446,6 +1450,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); Optional<bool> MaybeCanRename = None; + if (!EnableRenaming) + MaybeCanRename = {false}; + SmallPtrSet<const TargetRegisterClass *, 5> RequiredClasses; LiveRegUnits UsedInBetween; UsedInBetween.init(*TRI); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 6f4569a49783..131219ca6944 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -183,7 +183,21 @@ public: bool &AllowPromotionWithoutCommonHeader); bool shouldExpandReduction(const IntrinsicInst *II) const { - return false; + switch (II->getIntrinsicID()) { + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + // We don't have legalization support for ordered FP reductions. + return !II->getFastMathFlags().allowReassoc(); + + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + // Lowering asserts that there are no NaNs. + return !II->getFastMathFlags().noNaNs(); + + default: + // Don't expand anything else, let legalization deal with it. + return false; + } } unsigned getGISelRematGlobalCost() const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index fbed51de0ea4..a55a1747cafe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -156,9 +156,6 @@ extern char &SIWholeQuadModeID; void initializeSILowerControlFlowPass(PassRegistry &); extern char &SILowerControlFlowID; -void initializeSIRemoveShortExecBranchesPass(PassRegistry &); -extern char &SIRemoveShortExecBranchesID; - void initializeSIInsertSkipsPass(PassRegistry &); extern char &SIInsertSkipsPassID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index eb30d659bf0b..c8dc6f6e3bf4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -228,7 +228,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIModeRegisterPass(*PR); initializeSIWholeQuadModePass(*PR); initializeSILowerControlFlowPass(*PR); - initializeSIRemoveShortExecBranchesPass(*PR); initializeSIInsertSkipsPass(*PR); initializeSIMemoryLegalizerPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); @@ -994,7 +993,6 @@ void GCNPassConfig::addPreEmitPass() { // be better for it to emit S_NOP <N> when possible. addPass(&PostRAHazardRecognizerID); - addPass(&SIRemoveShortExecBranchesID); addPass(&SIInsertSkipsPassID); addPass(&BranchRelaxationPassID); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 191f603a66d6..01bb60f07f2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -117,24 +118,58 @@ static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA, return true; } +static void removeDoneExport(Function &F) { + ConstantInt *BoolFalse = ConstantInt::getFalse(F.getContext()); + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (IntrinsicInst *Intrin = llvm::dyn_cast<IntrinsicInst>(&I)) { + if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp) { + Intrin->setArgOperand(6, BoolFalse); // done + } else if (Intrin->getIntrinsicID() == Intrinsic::amdgcn_exp_compr) { + Intrin->setArgOperand(4, BoolFalse); // done + } + } + } + } +} + static BasicBlock *unifyReturnBlockSet(Function &F, ArrayRef<BasicBlock *> ReturningBlocks, + bool InsertExport, const TargetTransformInfo &TTI, StringRef Name) { // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F); + IRBuilder<> B(NewRetBlock); + + if (InsertExport) { + // Ensure that there's only one "done" export in the shader by removing the + // "done" bit set on the original final export. More than one "done" export + // can lead to undefined behavior. + removeDoneExport(F); + + Value *Undef = UndefValue::get(B.getFloatTy()); + B.CreateIntrinsic(Intrinsic::amdgcn_exp, { B.getFloatTy() }, + { + B.getInt32(9), // target, SQ_EXP_NULL + B.getInt32(0), // enabled channels + Undef, Undef, Undef, Undef, // values + B.getTrue(), // done + B.getTrue(), // valid mask + }); + } PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); + B.CreateRetVoid(); } else { // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), - "UnifiedRetVal"); - NewRetBlock->getInstList().push_back(PN); - ReturnInst::Create(F.getContext(), PN, NewRetBlock); + PN = B.CreatePHI(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); + assert(!InsertExport); + B.CreateRet(PN); } // Loop over all of the blocks, replacing the return instruction with an @@ -173,6 +208,8 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { // Dummy return block for infinite loop. BasicBlock *DummyReturnBB = nullptr; + bool InsertExport = false; + for (BasicBlock *BB : PDT.getRoots()) { if (isa<ReturnInst>(BB->getTerminator())) { if (!isUniformlyReached(DA, *BB)) @@ -188,6 +225,36 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { "DummyReturnBlock", &F); Type *RetTy = F.getReturnType(); Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy); + + // For pixel shaders, the producer guarantees that an export is + // executed before each return instruction. However, if there is an + // infinite loop and we insert a return ourselves, we need to uphold + // that guarantee by inserting a null export. This can happen e.g. in + // an infinite loop with kill instructions, which is supposed to + // terminate. However, we don't need to do this if there is a non-void + // return value, since then there is an epilog afterwards which will + // still export. + // + // Note: In the case where only some threads enter the infinite loop, + // this can result in the null export happening redundantly after the + // original exports. However, The last "real" export happens after all + // the threads that didn't enter an infinite loop converged, which + // means that the only extra threads to execute the null export are + // threads that entered the infinite loop, and they only could've + // exited through being killed which sets their exec bit to 0. + // Therefore, unless there's an actual infinite loop, which can have + // invalid results, or there's a kill after the last export, which we + // assume the frontend won't do, this export will have the same exec + // mask as the last "real" export, and therefore the valid mask will be + // overwritten with the same value and will still be correct. Also, + // even though this forces an extra unnecessary export wait, we assume + // that this happens rare enough in practice to that we don't have to + // worry about performance. + if (F.getCallingConv() == CallingConv::AMDGPU_PS && + RetTy->isVoidTy()) { + InsertExport = true; + } + ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); ReturningBlocks.push_back(DummyReturnBB); } @@ -260,6 +327,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); - unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock"); + unifyReturnBlockSet(F, ReturningBlocks, InsertExport, TTI, "UnifiedReturnBlock"); return true; } diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td index 1a526675164a..e2978624811d 100644 --- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td +++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td @@ -50,6 +50,8 @@ def COS_cm : COS_Common<0x8E>; def : RsqPat<RECIPSQRT_IEEE_cm, f32>; +def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>; + def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>; defm DIV_cm : DIV_Common<RECIP_IEEE_cm>; @@ -70,8 +72,6 @@ def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { -def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; - class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> : CF_MEM_RAT_CACHELESS <0x14, 0, mask, (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr), diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index 792e26d21f98..88e554ae0bcc 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -118,11 +118,12 @@ def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def : RsqPat<RECIPSQRT_IEEE_eg, f32>; +def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>; + def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>; -def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; } // End SubtargetPredicate = isEG //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index cbdf0de44f87..869c183e2245 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -1233,6 +1233,11 @@ def : R600Pat< def : RcpPat<recip_ieee, f32>; } +class SqrtPat<Instruction RsqInst, Instruction RecipInst> : R600Pat < + (fsqrt f32:$src), + (RecipInst (RsqInst $src)) +>; + //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1272,8 +1277,8 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; - def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def : RsqPat<RECIPSQRT_IEEE_r600, f32>; + def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>; def R600_ExportSwz : ExportSwzInst { let Word1{20-17} = 0; // BURST_COUNT diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 80c044ec00cb..87e63fcc4a04 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define DEBUG_TYPE "si-insert-skips" static cl::opt<unsigned> SkipThresholdFlag( - "amdgpu-skip-threshold-legacy", + "amdgpu-skip-threshold", cl::desc("Number of instructions before jumping over divergent control flow"), cl::init(12), cl::Hidden); @@ -466,9 +466,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; switch (MI.getOpcode()) { - case AMDGPU::S_CBRANCH_EXECZ: - ExecBranchStack.push_back(MI.getOperand(0).getMBB()); - break; case AMDGPU::SI_MASK_BRANCH: ExecBranchStack.push_back(MI.getOperand(0).getMBB()); MadeChange |= skipMaskBranch(MI, MBB); diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 61d2719a3aad..bf052dc3c930 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec) .addReg(Tmp, RegState::Kill); - // Insert the S_CBRANCH_EXECZ instruction which will be optimized later - // during SIRemoveShortExecBranches. - MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) + // Insert a pseudo terminator to help keep the verifier happy. This will also + // be used later when inserting skips. + MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) .add(MI.getOperand(2)); if (!LIS) { @@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { .addReg(DstReg); MachineInstr *Branch = - BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ)) - .addMBB(DestBB); + BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH)) + .addMBB(DestBB); if (!LIS) { MI.eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp deleted file mode 100644 index 51779e97ac62..000000000000 --- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===-- SIRemoveShortExecBranches.cpp ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass optmizes the s_cbranch_execz instructions. -/// The pass removes this skip instruction for short branches, -/// if there is no unwanted sideeffect in the fallthrough code sequence. -/// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -#define DEBUG_TYPE "si-remove-short-exec-branches" - -static unsigned SkipThreshold; - -static cl::opt<unsigned, true> SkipThresholdFlag( - "amdgpu-skip-threshold", cl::Hidden, - cl::desc( - "Number of instructions before jumping over divergent control flow"), - cl::location(SkipThreshold), cl::init(12)); - -namespace { - -class SIRemoveShortExecBranches : public MachineFunctionPass { -private: - const SIInstrInfo *TII = nullptr; - bool getBlockDestinations(MachineBasicBlock &SrcMBB, - MachineBasicBlock *&TrueMBB, - MachineBasicBlock *&FalseMBB, - SmallVectorImpl<MachineOperand> &Cond); - bool mustRetainExeczBranch(const MachineBasicBlock &From, - const MachineBasicBlock &To) const; - bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); - -public: - static char ID; - - SIRemoveShortExecBranches() : MachineFunctionPass(ID) { - initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; -}; - -} // End anonymous namespace. - -INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE, - "SI remove short exec branches", false, false) - -char SIRemoveShortExecBranches::ID = 0; - -char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID; - -bool SIRemoveShortExecBranches::getBlockDestinations( - MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB, - MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) { - if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond)) - return false; - - if (!FalseMBB) - FalseMBB = SrcMBB.getNextNode(); - - return true; -} - -bool SIRemoveShortExecBranches::mustRetainExeczBranch( - const MachineBasicBlock &From, const MachineBasicBlock &To) const { - unsigned NumInstr = 0; - const MachineFunction *MF = From.getParent(); - - for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end(); - MBBI != End && MBBI != ToI; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - // When a uniform loop is inside non-uniform control flow, the branch - // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken - // when EXEC = 0. We should skip the loop lest it becomes infinite. - if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ || - I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) - return true; - - if (TII->hasUnwantedEffectsWhenEXECEmpty(*I)) - return true; - - // These instructions are potentially expensive even if EXEC = 0. - if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) || - I->getOpcode() == AMDGPU::S_WAITCNT) - return true; - - ++NumInstr; - if (NumInstr >= SkipThreshold) - return true; - } - } - - return false; -} - -// Returns true if the skip branch instruction is removed. -bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI, - MachineBasicBlock &SrcMBB) { - MachineBasicBlock *TrueMBB = nullptr; - MachineBasicBlock *FalseMBB = nullptr; - SmallVector<MachineOperand, 1> Cond; - - if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond)) - return false; - - // Consider only the forward branches. - if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) || - mustRetainExeczBranch(*FalseMBB, *TrueMBB)) - return false; - - LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI); - MI.eraseFromParent(); - SrcMBB.removeSuccessor(TrueMBB); - - return true; -} - -bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) { - const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - TII = ST.getInstrInfo(); - MF.RenumberBlocks(); - bool Changed = false; - - for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); - if (MBBI == MBB.end()) - continue; - - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - case AMDGPU::S_CBRANCH_EXECZ: - Changed = removeExeczBranch(MI, MBB); - break; - default: - break; - } - } - - return Changed; -} diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5271bc3aacc6..8b21b9346987 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -559,7 +559,7 @@ bool isReadOnlySegment(const GlobalValue *GV) { } bool shouldEmitConstantsToTextSection(const Triple &TT) { - return TT.getOS() == Triple::AMDPAL; + return TT.getOS() == Triple::AMDPAL || TT.getArch() == Triple::r600; } int getIntegerAttribute(const Function &F, StringRef Name, int Default) { diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 634fb89b8e89..66ad120a111f 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -330,8 +330,8 @@ void ARMConstantIslands::verify() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// print block size and offset information - debugging LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { - BBInfoVector &BBInfo = BBUtils->getBBInfo(); LLVM_DEBUG({ + BBInfoVector &BBInfo = BBUtils->getBBInfo(); for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2c3ac816219f..de4377ec5a47 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1952,24 +1952,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::LOADDUAL: - case ARM::STOREDUAL: { - Register PairReg = MI.getOperand(0).getReg(); - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) - .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), - Opcode == ARM::LOADDUAL ? RegState::Define : 0) - .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), - Opcode == ARM::LOADDUAL ? RegState::Define : 0); - for (unsigned i = 1; i < MI.getNumOperands(); i++) - MIB.add(MI.getOperand(i)); - MIB.add(predOps(ARMCC::AL)); - MIB.cloneMemRefs(MI); - MI.eraseFromParent(); - return true; - } } } diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 76a9ac12062d..9b06987178d8 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -145,8 +145,6 @@ public: // Thumb 2 Addressing Modes: bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); - template <unsigned Shift> - bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, @@ -1296,33 +1294,6 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, return true; } -template <unsigned Shift> -bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, - SDValue &OffImm) { - if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { - int RHSC; - if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex( - FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } - - if (N.getOpcode() == ISD::SUB) - RHSC = -RHSC; - OffImm = - CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); - return true; - } - } - - // Base only. - Base = N; - OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); - return true; -} - bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R - imm8 operands. @@ -3515,26 +3486,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LDRD: { - if (Subtarget->isThumb2()) - break; // TableGen handles isel in this case. - SDValue Base, RegOffset, ImmOffset; - const SDValue &Chain = N->getOperand(0); - const SDValue &Addr = N->getOperand(1); - SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); - SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; - SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, - {MVT::Untyped, MVT::Other}, Ops); - SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, - SDValue(New, 0)); - SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, - SDValue(New, 0)); - ReplaceUses(SDValue(N, 0), Lo); - ReplaceUses(SDValue(N, 1), Hi); - ReplaceUses(SDValue(N, 2), SDValue(New, 1)); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LOOP_DEC: { SDValue Ops[] = { N->getOperand(1), N->getOperand(2), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cf738cd66434..1e6f7d889201 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1073,8 +1073,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); - setOperationAction(ISD::LOAD, MVT::i64, Custom); - setOperationAction(ISD::STORE, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl // assuming that ISD::SRL and SRA of i64 are already marked custom @@ -1598,9 +1596,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; - case ARMISD::LDRD: return "ARMISD::LDRD"; - case ARMISD::STRD: return "ARMISD::STRD"; - case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; @@ -9088,24 +9083,6 @@ static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); } -void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const { - LoadSDNode *LD = cast<LoadSDNode>(N); - EVT MemVT = LD->getMemoryVT(); - assert(LD->isUnindexed() && "Loads should be unindexed at this point."); - - if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && - !Subtarget->isThumb1Only() && LD->isVolatile()) { - SDLoc dl(N); - SDValue Result = DAG.getMemIntrinsicNode( - ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}), - {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand()); - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, - Result.getValue(0), Result.getValue(1)); - Results.append({Pair, Result.getValue(2)}); - } -} - static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); EVT MemVT = ST->getMemoryVT(); @@ -9135,34 +9112,6 @@ static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { ST->getMemOperand()); } -static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { - StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - EVT MemVT = ST->getMemoryVT(); - assert(ST->isUnindexed() && "Stores should be unindexed at this point."); - - if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && - !Subtarget->isThumb1Only() && ST->isVolatile()) { - SDNode *N = Op.getNode(); - SDLoc dl(N); - - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), - DAG.getTargetConstant(0, dl, MVT::i32)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), - DAG.getTargetConstant(1, dl, MVT::i32)); - - return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other), - {ST->getChain(), Lo, Hi, ST->getBasePtr()}, - MemVT, ST->getMemOperand()); - } else if (Subtarget->hasMVEIntegerOps() && - ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || - MemVT == MVT::v16i1))) { - return LowerPredicateStore(Op, DAG); - } - - return SDValue(); -} - static bool isZeroVector(SDValue N) { return (ISD::isBuildVectorAllZeros(N.getNode()) || (N->getOpcode() == ARMISD::VMOVIMM && @@ -9350,7 +9299,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: - return LowerSTORE(Op, DAG, Subtarget); + return LowerPredicateStore(Op, DAG); case ISD::MLOAD: return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: @@ -9452,9 +9401,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ABS: lowerABS(N, Results, DAG); return ; - case ISD::LOAD: - LowerLOAD(N, Results, DAG); - break; + } if (Res.getNode()) Results.push_back(Res); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 1baa22a4fa56..cc74e5d875d8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -278,11 +278,7 @@ class VectorType; VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD, - - // Load/Store of dual registers - LDRD, - STRD + VST4LN_UPD }; } // end namespace ARMISD @@ -735,8 +731,6 @@ class VectorType; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; - void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index ce67af6f1b49..3efe85a7d45c 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -243,12 +243,6 @@ def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; def ARMqsub16b : SDNode<"ARMISD::QSUB16b", SDT_ARMAnd, []>; -def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - // Vector operations shared between NEON and MVE def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; @@ -2701,14 +2695,6 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { Requires<[IsARM, HasV5TE]>; } -let mayLoad = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in { -def LOADDUAL : ARMPseudoInst<(outs GPRPairOp:$Rt), (ins addrmode3:$addr), - 64, IIC_iLoad_d_r, []>, - Requires<[IsARM, HasV5TE]> { - let AM = AddrMode3; -} -} - def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "lda", "\t$Rt, $addr", []>; def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), @@ -2984,19 +2970,6 @@ let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { } } -let mayStore = 1, hasSideEffects = 0, hasNoSchedulingInfo = 1 in { -def STOREDUAL : ARMPseudoInst<(outs), (ins GPRPairOp:$Rt, addrmode3:$addr), - 64, IIC_iStore_d_r, []>, - Requires<[IsARM, HasV5TE]> { - let AM = AddrMode3; -} -} - -let Predicates = [IsARM, HasV5TE] in { -def : Pat<(ARMstrd GPR:$Rt, GPR:$Rt2, addrmode3:$addr), - (STOREDUAL (REG_SEQUENCE GPRPair, GPR:$Rt, gsub_0, GPR:$Rt2, gsub_1), addrmode3:$addr)>; -} - // Indexed stores multiclass AI2_stridx<bit isByte, string opc, InstrItinClass iii, InstrItinClass iir> { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 4193e8147f47..c5aae235f25d 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -270,8 +270,7 @@ def t2am_imm8_offset : MemOperand, // t2addrmode_imm8s4 := reg +/- (imm8 << 2) def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";} -class T2AddrMode_Imm8s4 : MemOperand, - ComplexPattern<i32, 2, "SelectT2AddrModeImm8<2>", []> { +class T2AddrMode_Imm8s4 : MemOperand { let EncoderMethod = "getT2AddrModeImm8s4OpValue"; let DecoderMethod = "DecodeT2AddrModeImm8s4"; let ParserMatchClass = MemImm8s4OffsetAsmOperand; @@ -1449,8 +1448,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", - [(set rGPR:$Rt, rGPR:$Rt2, (ARMldrd t2addrmode_imm8s4:$addr))]>, + IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>, Sched<[WriteLd]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1631,8 +1629,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), - IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", - [(ARMstrd rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr)]>, + IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>, Sched<[WriteST]>; // Indexed stores diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 880588adfdfd..f66083eaf187 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -171,7 +171,26 @@ public: TTI::ReductionFlags Flags) const; bool shouldExpandReduction(const IntrinsicInst *II) const { - return false; + switch (II->getIntrinsicID()) { + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + // We don't have legalization support for ordered FP reductions. + if (!II->getFastMathFlags().allowReassoc()) + return true; + // Can't legalize reductions with soft floats. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs(); + + case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::experimental_vector_reduce_fmax: + // Can't legalize reductions with soft floats, and NoNan will create + // fminimum which we do not know how to lower. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() || + !II->getFastMathFlags().noNaNs(); + + default: + // Don't expand anything else, let legalization deal with it. + return false; + } } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index b81bf4e1320d..cbae4675cb14 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -110,6 +110,19 @@ private: return true; } + // Prevent reducing load width during SelectionDag phase. + // Otherwise, we may transform the following + // ctx = ctx + reloc_offset + // ... (*(u32 *)ctx) & 0x8000... + // to + // ctx = ctx + reloc_offset + // ... (*(u8 *)(ctx + 1)) & 0x80 ... + // which will be rejected by the verifier. + bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, + EVT NewVT) const override { + return false; + } + unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg, bool isSigned) const; diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 5310f0f07b65..29abc9303a62 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -70,9 +70,10 @@ private: public: // Main entry point for this pass. bool runOnMachineFunction(MachineFunction &MF) override { - if (!skipFunction(MF.getFunction())) { - initialize(MF); - } + if (skipFunction(MF.getFunction())) + return false; + + initialize(MF); return removeLD(); } }; diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 53562f42a184..c7efdf42a7c6 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -195,12 +195,13 @@ public: Parser.addAliasForDirective(".dword", ".8byte"); setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - if (Options.ABIName.back() == 'f' && + auto ABIName = StringRef(Options.ABIName); + if (ABIName.endswith("f") && !getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) { errs() << "Hard-float 'f' ABI can't be used for a target that " "doesn't support the F instruction set extension (ignoring " "target-abi)\n"; - } else if (Options.ABIName.back() == 'd' && + } else if (ABIName.endswith("d") && !getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) { errs() << "Hard-float 'd' ABI can't be used for a target that " "doesn't support the D instruction set extension (ignoring " diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 82afa13aece3..770e883221d1 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -92,10 +92,13 @@ include "RISCVSystemOperands.td" // Registers, calling conventions, instruction descriptions. //===----------------------------------------------------------------------===// +include "RISCVSchedule.td" include "RISCVRegisterInfo.td" include "RISCVCallingConv.td" include "RISCVInstrInfo.td" include "RISCVRegisterBanks.td" +include "RISCVSchedRocket32.td" +include "RISCVSchedRocket64.td" //===----------------------------------------------------------------------===// // RISC-V processors supported. @@ -106,6 +109,12 @@ def : ProcessorModel<"generic-rv32", NoSchedModel, [FeatureRVCHints]>; def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit, FeatureRVCHints]>; +def : ProcessorModel<"rocket-rv32", Rocket32Model, [FeatureRVCHints]>; + +def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit, + FeatureRVCHints]>; + + //===----------------------------------------------------------------------===// // Define the RISC-V target. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 7229ebfe1db0..3ed10cca5377 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -103,7 +103,8 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr, // Pseudo instructions class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = ""> - : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> { + : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo>, + Sched<[]> { let isPseudo = 1; let isCodeGenOnly = 1; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 8e9ad4965583..81f1abe8337e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -298,7 +298,8 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class BranchCC_rri<bits<3> funct3, string opcodestr> : RVInstB<funct3, OPC_BRANCH, (outs), (ins GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12), - opcodestr, "$rs1, $rs2, $imm12"> { + opcodestr, "$rs1, $rs2, $imm12">, + Sched<[WriteJmp]> { let isBranch = 1; let isTerminator = 1; } @@ -320,13 +321,15 @@ class Store_rri<bits<3> funct3, string opcodestr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_ri<bits<3> funct3, string opcodestr> : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), - opcodestr, "$rd, $rs1, $imm12">; + opcodestr, "$rd, $rs1, $imm12">, + Sched<[WriteIALU, ReadIALU]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class Shift_ri<bit arithshift, bits<3> funct3, string opcodestr> : RVInstIShift<arithshift, funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, uimmlog2xlen:$shamt), opcodestr, - "$rd, $rs1, $shamt">; + "$rd, $rs1, $shamt">, + Sched<[WriteShift, ReadShift]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr> @@ -336,19 +339,20 @@ class ALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr> let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ir<bits<3> funct3, string opcodestr> : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, GPR:$rs1), - opcodestr, "$rd, $imm12, $rs1">; + opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR, ReadCSR]>; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in class CSR_ii<bits<3> funct3, string opcodestr> : RVInstI<funct3, OPC_SYSTEM, (outs GPR:$rd), (ins csr_sysreg:$imm12, uimm5:$rs1), - opcodestr, "$rd, $imm12, $rs1">; + opcodestr, "$rd, $imm12, $rs1">, Sched<[WriteCSR]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ShiftW_ri<bit arithshift, bits<3> funct3, string opcodestr> : RVInstIShiftW<arithshift, funct3, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1, uimm5:$shamt), opcodestr, - "$rd, $rs1, $shamt">; + "$rd, $rs1, $shamt">, + Sched<[WriteShift32, ReadShift32]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class ALUW_rr<bits<7> funct7, bits<3> funct3, string opcodestr> @@ -367,19 +371,20 @@ class Priv<string opcodestr, bits<7> funct7> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { let isReMaterializable = 1, isAsCheapAsAMove = 1 in def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20), - "lui", "$rd, $imm20">; + "lui", "$rd, $imm20">, Sched<[WriteIALU]>; def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20), - "auipc", "$rd, $imm20">; + "auipc", "$rd, $imm20">, Sched<[WriteIALU]>; let isCall = 1 in def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20), - "jal", "$rd, $imm20">; + "jal", "$rd, $imm20">, Sched<[WriteJal]>; let isCall = 1 in def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), - "jalr", "$rd, ${imm12}(${rs1})">; + "jalr", "$rd, ${imm12}(${rs1})">, + Sched<[WriteJalr, ReadJalr]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 def BEQ : BranchCC_rri<0b000, "beq">; @@ -389,15 +394,15 @@ def BGE : BranchCC_rri<0b101, "bge">; def BLTU : BranchCC_rri<0b110, "bltu">; def BGEU : BranchCC_rri<0b111, "bgeu">; -def LB : Load_ri<0b000, "lb">; -def LH : Load_ri<0b001, "lh">; -def LW : Load_ri<0b010, "lw">; -def LBU : Load_ri<0b100, "lbu">; -def LHU : Load_ri<0b101, "lhu">; +def LB : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>; +def LH : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>; +def LW : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>; +def LBU : Load_ri<0b100, "lbu">, Sched<[WriteLDB, ReadMemBase]>; +def LHU : Load_ri<0b101, "lhu">, Sched<[WriteLDH, ReadMemBase]>; -def SB : Store_rri<0b000, "sb">; -def SH : Store_rri<0b001, "sh">; -def SW : Store_rri<0b010, "sw">; +def SB : Store_rri<0b000, "sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>; +def SH : Store_rri<0b001, "sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>; +def SW : Store_rri<0b010, "sw">, Sched<[WriteSTW, ReadStoreData, ReadMemBase]>; // ADDI isn't always rematerializable, but isReMaterializable will be used as // a hint which is verified in isReallyTriviallyReMaterializable. @@ -418,21 +423,21 @@ def SLLI : Shift_ri<0, 0b001, "slli">; def SRLI : Shift_ri<0, 0b101, "srli">; def SRAI : Shift_ri<1, 0b101, "srai">; -def ADD : ALU_rr<0b0000000, 0b000, "add">; -def SUB : ALU_rr<0b0100000, 0b000, "sub">; -def SLL : ALU_rr<0b0000000, 0b001, "sll">; -def SLT : ALU_rr<0b0000000, 0b010, "slt">; -def SLTU : ALU_rr<0b0000000, 0b011, "sltu">; -def XOR : ALU_rr<0b0000000, 0b100, "xor">; -def SRL : ALU_rr<0b0000000, 0b101, "srl">; -def SRA : ALU_rr<0b0100000, 0b101, "sra">; -def OR : ALU_rr<0b0000000, 0b110, "or">; -def AND : ALU_rr<0b0000000, 0b111, "and">; +def ADD : ALU_rr<0b0000000, 0b000, "add">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SUB : ALU_rr<0b0100000, 0b000, "sub">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLL : ALU_rr<0b0000000, 0b001, "sll">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLT : ALU_rr<0b0000000, 0b010, "slt">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SLTU : ALU_rr<0b0000000, 0b011, "sltu">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def XOR : ALU_rr<0b0000000, 0b100, "xor">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SRL : ALU_rr<0b0000000, 0b101, "srl">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def SRA : ALU_rr<0b0100000, 0b101, "sra">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def OR : ALU_rr<0b0000000, 0b110, "or">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def AND : ALU_rr<0b0000000, 0b111, "and">, Sched<[WriteIALU, ReadIALU, ReadIALU]>; let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins fencearg:$pred, fencearg:$succ), - "fence", "$pred, $succ"> { + "fence", "$pred, $succ">, Sched<[]> { bits<4> pred; bits<4> succ; @@ -441,25 +446,26 @@ def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), let imm12 = {0b0000,pred,succ}; } -def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", ""> { +def FENCE_TSO : RVInstI<0b000, OPC_MISC_MEM, (outs), (ins), "fence.tso", "">, Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = {0b1000,0b0011,0b0011}; } -def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", ""> { +def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", "">, Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = 0; } -def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", ""> { +def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", "">, Sched<[WriteJmp]> { let rs1 = 0; let rd = 0; let imm12 = 0; } -def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> { +def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", "">, + Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = 1; @@ -468,7 +474,8 @@ def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> { // This is a de facto standard (as set by GNU binutils) 32-bit unimplemented // instruction (i.e., it should always trap, if your implementation has invalid // instruction traps). -def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", ""> { +def UNIMP : RVInstI<0b001, OPC_SYSTEM, (outs), (ins), "unimp", "">, + Sched<[]> { let rs1 = 0; let rd = 0; let imm12 = 0b110000000000; @@ -486,24 +493,30 @@ def CSRRCI : CSR_ii<0b111, "csrrci">; /// RV64I instructions let Predicates = [IsRV64] in { -def LWU : Load_ri<0b110, "lwu">; -def LD : Load_ri<0b011, "ld">; -def SD : Store_rri<0b011, "sd">; +def LWU : Load_ri<0b110, "lwu">, Sched<[WriteLDWU, ReadMemBase]>; +def LD : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>; +def SD : Store_rri<0b011, "sd">, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), - "addiw", "$rd, $rs1, $imm12">; + "addiw", "$rd, $rs1, $imm12">, + Sched<[WriteIALU32, ReadIALU32]>; def SLLIW : ShiftW_ri<0, 0b001, "slliw">; def SRLIW : ShiftW_ri<0, 0b101, "srliw">; def SRAIW : ShiftW_ri<1, 0b101, "sraiw">; -def ADDW : ALUW_rr<0b0000000, 0b000, "addw">; -def SUBW : ALUW_rr<0b0100000, 0b000, "subw">; -def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">; -def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">; -def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">; +def ADDW : ALUW_rr<0b0000000, 0b000, "addw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SUBW : ALUW_rr<0b0100000, 0b000, "subw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SLLW : ALUW_rr<0b0000000, 0b001, "sllw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SRLW : ALUW_rr<0b0000000, 0b101, "srlw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; } // Predicates = [IsRV64] //===----------------------------------------------------------------------===// @@ -511,26 +524,26 @@ def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">; //===----------------------------------------------------------------------===// let isBarrier = 1, isReturn = 1, isTerminator = 1 in { -def URET : Priv<"uret", 0b0000000> { +def URET : Priv<"uret", 0b0000000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } -def SRET : Priv<"sret", 0b0001000> { +def SRET : Priv<"sret", 0b0001000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } -def MRET : Priv<"mret", 0b0011000> { +def MRET : Priv<"mret", 0b0011000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00010; } } // isBarrier = 1, isReturn = 1, isTerminator = 1 -def WFI : Priv<"wfi", 0b0001000> { +def WFI : Priv<"wfi", 0b0001000>, Sched<[]> { let rd = 0; let rs1 = 0; let rs2 = 0b00101; @@ -539,7 +552,7 @@ def WFI : Priv<"wfi", 0b0001000> { let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in def SFENCE_VMA : RVInstR<0b0001001, 0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1, GPR:$rs2), - "sfence.vma", "$rs1, $rs2"> { + "sfence.vma", "$rs1, $rs2">, Sched<[]> { let rd = 0; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 7321f4bd9d2f..de73c8df9367 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -77,31 +77,51 @@ multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy> { //===----------------------------------------------------------------------===// let Predicates = [HasStdExtA] in { -defm LR_W : LR_r_aq_rl<0b010, "lr.w">; -defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">; -defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">; -defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">; -defm AMOXOR_W : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">; -defm AMOAND_W : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">; -defm AMOOR_W : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">; -defm AMOMIN_W : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">; -defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">; -defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">; -defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">; +defm LR_W : LR_r_aq_rl<0b010, "lr.w">, Sched<[WriteAtomicLDW, ReadAtomicLDW]>; +defm SC_W : AMO_rr_aq_rl<0b00011, 0b010, "sc.w">, + Sched<[WriteAtomicSTW, ReadAtomicSTW, ReadAtomicSTW]>; +defm AMOSWAP_W : AMO_rr_aq_rl<0b00001, 0b010, "amoswap.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOADD_W : AMO_rr_aq_rl<0b00000, 0b010, "amoadd.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOXOR_W : AMO_rr_aq_rl<0b00100, 0b010, "amoxor.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOAND_W : AMO_rr_aq_rl<0b01100, 0b010, "amoand.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOOR_W : AMO_rr_aq_rl<0b01000, 0b010, "amoor.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMIN_W : AMO_rr_aq_rl<0b10000, 0b010, "amomin.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMAX_W : AMO_rr_aq_rl<0b10100, 0b010, "amomax.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMINU_W : AMO_rr_aq_rl<0b11000, 0b010, "amominu.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; +defm AMOMAXU_W : AMO_rr_aq_rl<0b11100, 0b010, "amomaxu.w">, + Sched<[WriteAtomicW, ReadAtomicWA, ReadAtomicWD]>; } // Predicates = [HasStdExtA] let Predicates = [HasStdExtA, IsRV64] in { -defm LR_D : LR_r_aq_rl<0b011, "lr.d">; -defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">; -defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">; -defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">; -defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">; -defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">; -defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">; -defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">; -defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">; -defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">; -defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">; +defm LR_D : LR_r_aq_rl<0b011, "lr.d">, Sched<[WriteAtomicLDD, ReadAtomicLDD]>; +defm SC_D : AMO_rr_aq_rl<0b00011, 0b011, "sc.d">, + Sched<[WriteAtomicSTD, ReadAtomicSTD, ReadAtomicSTD]>; +defm AMOSWAP_D : AMO_rr_aq_rl<0b00001, 0b011, "amoswap.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOADD_D : AMO_rr_aq_rl<0b00000, 0b011, "amoadd.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOXOR_D : AMO_rr_aq_rl<0b00100, 0b011, "amoxor.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOAND_D : AMO_rr_aq_rl<0b01100, 0b011, "amoand.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOOR_D : AMO_rr_aq_rl<0b01000, 0b011, "amoor.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; +defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">, + Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>; } // Predicates = [HasStdExtA, IsRV64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index fa0050f107b2..f68767847ade 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -282,7 +282,8 @@ let Predicates = [HasStdExtC] in { let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), (ins SP:$rs1, uimm10_lsb00nonzero:$imm), - "c.addi4spn", "$rd, $rs1, $imm"> { + "c.addi4spn", "$rd, $rs1, $imm">, + Sched<[WriteIALU, ReadIALU]> { bits<5> rs1; let Inst{12-11} = imm{5-4}; let Inst{10-7} = imm{9-6}; @@ -291,13 +292,15 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000> { +def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>, + Sched<[WriteFLD64, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> { +def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>, + Sched<[WriteLDW, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -306,7 +309,8 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00> { let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> { +def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, + Sched<[WriteFLD32, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -314,20 +318,23 @@ def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00> { } let Predicates = [HasStdExtC, IsRV64] in -def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000> { +def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, + Sched<[WriteLDD, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000> { +def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>, + Sched<[WriteFST64, ReadStoreData, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } -def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> { +def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -336,7 +343,8 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00> { let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> { +def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, + Sched<[WriteFST32, ReadStoreData, ReadMemBase]> { bits<7> imm; let Inst{12-10} = imm{5-3}; let Inst{6} = imm{2}; @@ -344,14 +352,16 @@ def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00> { } let Predicates = [HasStdExtC, IsRV64] in -def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000> { +def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>, + Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { bits<8> imm; let Inst{12-10} = imm{5-3}; let Inst{6-5} = imm{7-6}; } let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", ""> +def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">, + Sched<[WriteNop]> { let Inst{6-2} = 0; } @@ -359,7 +369,8 @@ def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", ""> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, simm6nonzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; } @@ -367,7 +378,8 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI_NOP : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), (ins GPRX0:$rd, immzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let isAsmParserOnly = 1; @@ -377,27 +389,30 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, DecoderNamespace = "RISCV32Only_", Defs = [X1], Predicates = [HasStdExtC, IsRV32] in def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset), - "c.jal", "$offset">; + "c.jal", "$offset">, Sched<[WriteJal]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Predicates = [HasStdExtC, IsRV64] in def C_ADDIW : RVInst16CI<0b001, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, simm6:$imm), - "c.addiw", "$rd, $imm"> { + "c.addiw", "$rd, $imm">, + Sched<[WriteIALU32, ReadIALU32]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_LI : RVInst16CI<0b010, 0b01, (outs GPRNoX0:$rd), (ins simm6:$imm), - "c.li", "$rd, $imm"> { + "c.li", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), (ins SP:$rd, simm10_lsb0000nonzero:$imm), - "c.addi16sp", "$rd, $imm"> { + "c.addi16sp", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{12} = imm{9}; let Inst{11-7} = 2; @@ -410,78 +425,93 @@ def C_ADDI16SP : RVInst16CI<0b011, 0b01, (outs SP:$rd_wb), let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_LUI : RVInst16CI<0b011, 0b01, (outs GPRNoX0X2:$rd), (ins c_lui_imm:$imm), - "c.lui", "$rd, $imm"> { + "c.lui", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; } -def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>; -def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>; +def C_SRLI : Shift_right<0b00, "c.srli", GPRC, uimmlog2xlennonzero>, + Sched<[WriteShift, ReadShift]>; +def C_SRAI : Shift_right<0b01, "c.srai", GPRC, uimmlog2xlennonzero>, + Sched<[WriteShift, ReadShift]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:$imm), - "c.andi", "$rs1, $imm"> { + "c.andi", "$rs1, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rs1 = $rs1_wb"; let Inst{12} = imm{5}; let Inst{11-10} = 0b10; let Inst{6-2} = imm{4-0}; } -def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>; -def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>; -def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>; -def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>; +def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; +def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>, + Sched<[WriteIALU, ReadIALU, ReadIALU]>; let Predicates = [HasStdExtC, IsRV64] in { -def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>; -def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>; +def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; +def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>, + Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset), - "c.j", "$offset"> { + "c.j", "$offset">, Sched<[WriteJmp]> { let isBranch = 1; let isTerminator=1; let isBarrier=1; } -def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>; -def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>; +def C_BEQZ : Bcz<0b110, "c.beqz", seteq, GPRC>, Sched<[WriteJmp]>; +def C_BNEZ : Bcz<0b111, "c.bnez", setne, GPRC>, Sched<[WriteJmp]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, uimmlog2xlennonzero:$imm), - "c.slli" ,"$rd, $imm"> { + "c.slli" ,"$rd, $imm">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000> { +def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>, + Sched<[WriteFLD64, ReadMemBase]> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; } -def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00> { +def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>, + Sched<[WriteLDW, ReadMemBase]> { let Inst{6-4} = imm{4-2}; let Inst{3-2} = imm{7-6}; } let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00> { +def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, + Sched<[WriteFLD32, ReadMemBase]> { let Inst{6-4} = imm{4-2}; let Inst{3-2} = imm{7-6}; } let Predicates = [HasStdExtC, IsRV64] in -def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000> { +def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>, + Sched<[WriteLDD, ReadMemBase]> { let Inst{6-5} = imm{4-3}; let Inst{4-2} = imm{8-6}; } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), - "c.jr", "$rs1"> { + "c.jr", "$rs1">, Sched<[WriteJmpReg]> { let isBranch = 1; let isBarrier = 1; let isTerminator = 1; @@ -491,43 +521,49 @@ def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1), let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_MV : RVInst16CR<0b1000, 0b10, (outs GPRNoX0:$rs1), (ins GPRNoX0:$rs2), - "c.mv", "$rs1, $rs2">; + "c.mv", "$rs1, $rs2">, + Sched<[WriteIALU, ReadIALU]>; let rs1 = 0, rs2 = 0, hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">; +def C_EBREAK : RVInst16CR<0b1001, 0b10, (outs), (ins), "c.ebreak", "">, Sched<[]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall=1, Defs=[X1], rs2 = 0 in def C_JALR : RVInst16CR<0b1001, 0b10, (outs), (ins GPRNoX0:$rs1), - "c.jalr", "$rs1">; + "c.jalr", "$rs1">, Sched<[WriteJalr, ReadJalr]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPRNoX0:$rs1_wb), (ins GPRNoX0:$rs1, GPRNoX0:$rs2), - "c.add", "$rs1, $rs2"> { + "c.add", "$rs1, $rs2">, + Sched<[WriteIALU, ReadIALU, ReadIALU]> { let Constraints = "$rs1 = $rs1_wb"; } let Predicates = [HasStdExtC, HasStdExtD] in -def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000> { +def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>, + Sched<[WriteFST64, ReadStoreData, ReadMemBase]> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } -def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00> { +def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>, + Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { let Inst{12-9} = imm{5-2}; let Inst{8-7} = imm{7-6}; } let DecoderNamespace = "RISCV32Only_", Predicates = [HasStdExtC, HasStdExtF, IsRV32] in -def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00> { +def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, + Sched<[WriteFST32, ReadStoreData, ReadMemBase]> { let Inst{12-9} = imm{5-2}; let Inst{8-7} = imm{7-6}; } let Predicates = [HasStdExtC, IsRV64] in -def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> { +def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>, + Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { let Inst{12-10} = imm{5-3}; let Inst{9-7} = imm{8-6}; } @@ -535,7 +571,8 @@ def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000> { // The all zeros pattern isn't a valid RISC-V instruction. It's used by GNU // binutils as 16-bit instruction known to be unimplemented (i.e., trapping). let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in -def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther> { +def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>, + Sched<[]> { let Inst{15-0} = 0; } @@ -551,7 +588,7 @@ let Predicates = [HasStdExtC, HasRVCHints], hasSideEffects = 0, mayLoad = 0, let rd = 0 in def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), - "c.nop", "$imm"> { + "c.nop", "$imm">, Sched<[WriteNop]> { let Inst{6-2} = imm{4-0}; let DecoderMethod = "decodeRVCInstrSImm"; } @@ -559,7 +596,8 @@ def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm), // Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6. def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), (ins GPRX0:$rd, simm6nonzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; let isAsmParserOnly = 1; @@ -567,14 +605,16 @@ def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb), def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb), (ins GPRNoX0:$rd, immzero:$imm), - "c.addi", "$rd, $imm"> { + "c.addi", "$rd, $imm">, + Sched<[WriteIALU, ReadIALU]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let isAsmParserOnly = 1; } def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), - "c.li", "$rd, $imm"> { + "c.li", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdSImm"; @@ -582,14 +622,15 @@ def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm), def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd), (ins c_lui_imm:$imm), - "c.lui", "$rd, $imm"> { + "c.lui", "$rd, $imm">, + Sched<[WriteIALU]> { let Inst{6-2} = imm{4-0}; let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdSImm"; } def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), - "c.mv", "$rs1, $rs2"> + "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> { let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdRs2"; @@ -597,7 +638,8 @@ def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2), def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb), (ins GPRX0:$rs1, GPRNoX0:$rs2), - "c.add", "$rs1, $rs2"> { + "c.add", "$rs1, $rs2">, + Sched<[WriteIALU, ReadIALU, ReadIALU]> { let Constraints = "$rs1 = $rs1_wb"; let Inst{11-7} = 0; let DecoderMethod = "decodeRVCInstrRdRs1Rs2"; @@ -605,7 +647,8 @@ def C_ADD_HINT : RVInst16CR<0b1001, 0b10, (outs GPRX0:$rs1_wb), def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), (ins GPRX0:$rd, uimmlog2xlennonzero:$imm), - "c.slli" ,"$rd, $imm"> { + "c.slli" ,"$rd, $imm">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = imm{4-0}; let Inst{11-7} = 0; @@ -613,7 +656,8 @@ def C_SLLI_HINT : RVInst16CI<0b000, 0b10, (outs GPRX0:$rd_wb), } def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), - "c.slli64" ,"$rd"> { + "c.slli64" ,"$rd">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let Inst{12} = 0; @@ -621,7 +665,8 @@ def C_SLLI64_HINT : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), (ins GPR:$rd), def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), (ins GPRC:$rd), - "c.srli64", "$rd"> { + "c.srli64", "$rd">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let Inst{11-10} = 0; @@ -630,7 +675,8 @@ def C_SRLI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb), (ins GPRC:$rd), - "c.srai64", "$rd"> { + "c.srai64", "$rd">, + Sched<[WriteShift, ReadShift]> { let Constraints = "$rd = $rd_wb"; let Inst{6-2} = 0; let Inst{11-10} = 1; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index b5343e8a8309..4a036eb52bb8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -42,13 +42,15 @@ class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr> : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd), - (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">; + (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUD_rr_frm<bits<7> funct7, string opcodestr> : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd), (ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr, - "$rd, $rs1, $rs2, $funct3">; + "$rd, $rs1, $rs2, $funct3">, + Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr> : InstAlias<OpcodeStr#" $rd, $rs1, $rs2", @@ -57,7 +59,8 @@ class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPCmpD_rr<bits<3> funct3, string opcodestr> : RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">; + (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFCmp64, ReadFCmp64, ReadFCmp64]>; //===----------------------------------------------------------------------===// // Instructions @@ -68,7 +71,8 @@ let Predicates = [HasStdExtD] in { let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), (ins GPR:$rs1, simm12:$imm12), - "fld", "$rd, ${imm12}(${rs1})">; + "fld", "$rd, ${imm12}(${rs1})">, + Sched<[WriteFLD64, ReadMemBase]>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction @@ -76,15 +80,20 @@ def FLD : RVInstI<0b011, OPC_LOAD_FP, (outs FPR64:$rd), let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in def FSD : RVInstS<0b011, OPC_STORE_FP, (outs), (ins FPR64:$rs2, GPR:$rs1, simm12:$imm12), - "fsd", "$rs2, ${imm12}(${rs1})">; + "fsd", "$rs2, ${imm12}(${rs1})">, + Sched<[WriteFST64, ReadStoreData, ReadMemBase]>; -def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">; +def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">, + Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>; def : FPFMADDynFrmAlias<FMADD_D, "fmadd.d">; -def FMSUB_D : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">; +def FMSUB_D : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">, + Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>; def : FPFMADDynFrmAlias<FMSUB_D, "fmsub.d">; -def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">; +def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">, + Sched<[WriteFMulSub64, ReadFMulSub64, ReadFMulSub64, ReadFMulSub64]>; def : FPFMADDynFrmAlias<FNMSUB_D, "fnmsub.d">; -def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">; +def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">, + Sched<[WriteFMulAdd64, ReadFMulAdd64, ReadFMulAdd64, ReadFMulAdd64]>; def : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">; def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">; @@ -96,7 +105,8 @@ def : FPALUDDynFrmAlias<FMUL_D, "fmul.d">; def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">; def : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">; -def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d"> { +def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">, + Sched<[WriteFSqrt32, ReadFSqrt32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>; @@ -107,12 +117,14 @@ def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">; def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">; def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">; -def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d"> { +def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">, + Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>; -def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s"> { +def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s">, + Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]> { let rs2 = 0b00000; } @@ -120,55 +132,66 @@ def FEQ_D : FPCmpD_rr<0b010, "feq.d">; def FLT_D : FPCmpD_rr<0b001, "flt.d">; def FLE_D : FPCmpD_rr<0b000, "fle.d">; -def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d"> { +def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d">, + Sched<[WriteFClass64, ReadFClass64]> { let rs2 = 0b00000; } -def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d"> { +def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>; -def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d"> { +def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>; -def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w"> { +def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> { let rs2 = 0b00000; } -def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu"> { +def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> { let rs2 = 0b00001; } } // Predicates = [HasStdExtD] let Predicates = [HasStdExtD, IsRV64] in { -def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d"> { +def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>; -def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d"> { +def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>; -def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d"> { +def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d">, + Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]> { let rs2 = 0b00000; } -def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l"> { +def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>; -def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu"> { +def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>; -def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x"> { +def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x">, + Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]> { let rs2 = 0b00000; } } // Predicates = [HasStdExtD, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 3b73c865ea17..782c3f65af14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -60,7 +60,8 @@ class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr> let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr> : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd), - (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">; + (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPALUS_rr_frm<bits<7> funct7, string opcodestr> @@ -93,7 +94,8 @@ class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr, let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPCmpS_rr<bits<3> funct3, string opcodestr> : RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">; + (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[WriteFCmp32, ReadFCmp32, ReadFCmp32]>; //===----------------------------------------------------------------------===// // Instructions @@ -103,7 +105,8 @@ let Predicates = [HasStdExtF] in { let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), (ins GPR:$rs1, simm12:$imm12), - "flw", "$rd, ${imm12}(${rs1})">; + "flw", "$rd, ${imm12}(${rs1})">, + Sched<[WriteFLD32, ReadMemBase]>; // Operands for stores are in the order srcreg, base, offset rather than // reflecting the order these fields are specified in the instruction @@ -111,27 +114,37 @@ def FLW : RVInstI<0b010, OPC_LOAD_FP, (outs FPR32:$rd), let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in def FSW : RVInstS<0b010, OPC_STORE_FP, (outs), (ins FPR32:$rs2, GPR:$rs1, simm12:$imm12), - "fsw", "$rs2, ${imm12}(${rs1})">; + "fsw", "$rs2, ${imm12}(${rs1})">, + Sched<[WriteFST32, ReadStoreData, ReadMemBase]>; -def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">; +def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">, + Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>; def : FPFMASDynFrmAlias<FMADD_S, "fmadd.s">; -def FMSUB_S : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">; +def FMSUB_S : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">, + Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>; def : FPFMASDynFrmAlias<FMSUB_S, "fmsub.s">; -def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">; +def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">, + Sched<[WriteFMulSub32, ReadFMulSub32, ReadFMulSub32, ReadFMulSub32]>; def : FPFMASDynFrmAlias<FNMSUB_S, "fnmsub.s">; -def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">; +def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">, + Sched<[WriteFMulAdd32, ReadFMulAdd32, ReadFMulAdd32, ReadFMulAdd32]>; def : FPFMASDynFrmAlias<FNMADD_S, "fnmadd.s">; -def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">; +def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">, + Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; def : FPALUSDynFrmAlias<FADD_S, "fadd.s">; -def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">; +def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">, + Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; def : FPALUSDynFrmAlias<FSUB_S, "fsub.s">; -def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">; +def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">, + Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>; def : FPALUSDynFrmAlias<FMUL_S, "fmul.s">; -def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">; +def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">, + Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>; def : FPALUSDynFrmAlias<FDIV_S, "fdiv.s">; -def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s"> { +def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">, + Sched<[WriteFSqrt32, ReadFSqrt32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>; @@ -142,17 +155,20 @@ def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">; def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">; def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">; -def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s"> { +def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>; -def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s"> { +def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>; -def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w"> { +def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w">, + Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]> { let rs2 = 0b00000; } @@ -160,42 +176,50 @@ def FEQ_S : FPCmpS_rr<0b010, "feq.s">; def FLT_S : FPCmpS_rr<0b001, "flt.s">; def FLE_S : FPCmpS_rr<0b000, "fle.s">; -def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s"> { +def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s">, + Sched<[WriteFClass32, ReadFClass32]> { let rs2 = 0b00000; } -def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w"> { +def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> { let rs2 = 0b00000; } def : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>; -def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu"> { +def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> { let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>; -def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x"> { +def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x">, + Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]> { let rs2 = 0b00000; } } // Predicates = [HasStdExtF] let Predicates = [HasStdExtF, IsRV64] in { -def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s"> { +def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>; -def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s"> { +def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>; -def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l"> { +def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> { let rs2 = 0b00010; } def : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>; -def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu"> { +def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> { let rs2 = 0b00011; } def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index e75151ba99c7..987534aadd79 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -24,22 +24,35 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>; //===----------------------------------------------------------------------===// let Predicates = [HasStdExtM] in { -def MUL : ALU_rr<0b0000001, 0b000, "mul">; -def MULH : ALU_rr<0b0000001, 0b001, "mulh">; -def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">; -def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">; -def DIV : ALU_rr<0b0000001, 0b100, "div">; -def DIVU : ALU_rr<0b0000001, 0b101, "divu">; -def REM : ALU_rr<0b0000001, 0b110, "rem">; -def REMU : ALU_rr<0b0000001, 0b111, "remu">; +def MUL : ALU_rr<0b0000001, 0b000, "mul">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def MULH : ALU_rr<0b0000001, 0b001, "mulh">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def MULHU : ALU_rr<0b0000001, 0b011, "mulhu">, + Sched<[WriteIMul, ReadIMul, ReadIMul]>; +def DIV : ALU_rr<0b0000001, 0b100, "div">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; +def DIVU : ALU_rr<0b0000001, 0b101, "divu">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; +def REM : ALU_rr<0b0000001, 0b110, "rem">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; +def REMU : ALU_rr<0b0000001, 0b111, "remu">, + Sched<[WriteIDiv, ReadIDiv, ReadIDiv]>; } // Predicates = [HasStdExtM] let Predicates = [HasStdExtM, IsRV64] in { -def MULW : ALUW_rr<0b0000001, 0b000, "mulw">; -def DIVW : ALUW_rr<0b0000001, 0b100, "divw">; -def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">; -def REMW : ALUW_rr<0b0000001, 0b110, "remw">; -def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">; +def MULW : ALUW_rr<0b0000001, 0b000, "mulw">, + Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>; +def DIVW : ALUW_rr<0b0000001, 0b100, "divw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; +def DIVUW : ALUW_rr<0b0000001, 0b101, "divuw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; +def REMW : ALUW_rr<0b0000001, 0b110, "remw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; +def REMUW : ALUW_rr<0b0000001, 0b111, "remuw">, + Sched<[WriteIDiv32, ReadIDiv32, ReadIDiv32]>; } // Predicates = [HasStdExtM, IsRV64] //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket32.td b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td new file mode 100644 index 000000000000..8a91a70b61c7 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket32.td @@ -0,0 +1,213 @@ +//==- RISCVSchedRocket32.td - Rocket Scheduling Definitions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Rocket machine model for scheduling and other instruction cost heuristics. +def Rocket32Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order. + let IssueWidth = 1; // 1 micro-ops are dispatched per cycle. + let LoadLatency = 3; + let MispredictPenalty = 3; + let CompleteModel = 1; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Rocket is in-order. + +let BufferSize = 0 in { +def Rocket32UnitALU : ProcResource<1>; // Int ALU +def Rocket32UnitIMul : ProcResource<1>; // Int Multiply +def Rocket32UnitMem : ProcResource<1>; // Load/Store +def Rocket32UnitB : ProcResource<1>; // Branch + +def Rocket32UnitFPALU : ProcResource<1>; // FP ALU +} + +let BufferSize = 1 in { +def Rocket32UnitIDiv : ProcResource<1>; // Int Division +def Rocket32UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt' +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = Rocket32Model in { + +def : WriteRes<WriteJmp, [Rocket32UnitB]>; +def : WriteRes<WriteJal, [Rocket32UnitB]>; +def : WriteRes<WriteJalr, [Rocket32UnitB]>; +def : WriteRes<WriteJmpReg, [Rocket32UnitB]>; + +def : WriteRes<WriteIALU, [Rocket32UnitALU]>; +def : WriteRes<WriteShift, [Rocket32UnitALU]>; + +// Multiplies on Rocket differ by implementation; placeholder until +// we can determine how to read from command line +def : WriteRes<WriteIMul, [Rocket32UnitIMul]> { let Latency = 4; } + +// 32-bit divides have worse case latency of 34 cycle +def : WriteRes<WriteIDiv, [Rocket32UnitIDiv]> { + let Latency = 34; + let ResourceCycles = [34]; +} + +// Memory +def : WriteRes<WriteSTB, [Rocket32UnitMem]>; +def : WriteRes<WriteSTH, [Rocket32UnitMem]>; +def : WriteRes<WriteSTW, [Rocket32UnitMem]>; +def : WriteRes<WriteFST32, [Rocket32UnitMem]>; +def : WriteRes<WriteFST64, [Rocket32UnitMem]>; + +let Latency = 3 in { +def : WriteRes<WriteLDB, [Rocket32UnitMem]>; +def : WriteRes<WriteLDH, [Rocket32UnitMem]>; +def : WriteRes<WriteCSR, [Rocket32UnitALU]>; +} + +let Latency = 2 in { +def : WriteRes<WriteLDW, [Rocket32UnitMem]>; +def : WriteRes<WriteFLD32, [Rocket32UnitMem]>; +def : WriteRes<WriteFLD64, [Rocket32UnitMem]>; + +def : WriteRes<WriteAtomicW, [Rocket32UnitMem]>; +def : WriteRes<WriteAtomicLDW, [Rocket32UnitMem]>; +} + +def : WriteRes<WriteAtomicSTW, [Rocket32UnitMem]>; + +// Most FP single precision operations are 4 cycles +def : WriteRes<WriteFALU32, [Rocket32UnitFPALU]> { let Latency = 4; } + +// Most FP double precision operations are 6 cycles +def : WriteRes<WriteFALU64, [Rocket32UnitFPALU]> { let Latency = 6; } + +let Latency = 2 in { +def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>; + +def : WriteRes<WriteFClass32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFClass64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCmp32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCmp64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFMovF32ToI32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFMovI32ToF32, [Rocket32UnitFPALU]>; +} + +let Latency = 5 in { +def : WriteRes<WriteFMul32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFMulAdd32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFMulSub32, [Rocket32UnitFPALU]>; +} + +let Latency = 7 in { +def : WriteRes<WriteFMul64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFMulAdd64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFMulSub64, [Rocket32UnitFPALU]>; +} + +// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency +let Latency = 20, ResourceCycles = [20] in { +def : WriteRes<WriteFDiv32, [Rocket32UnitFPDivSqrt]>; +def : WriteRes<WriteFDiv64, [Rocket32UnitFPDivSqrt]>; +} + +// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency +def : WriteRes<WriteFSqrt32, [Rocket32UnitFPDivSqrt]> { let Latency = 20; + let ResourceCycles = [20];} +def : WriteRes<WriteFSqrt64, [Rocket32UnitFPDivSqrt]> { let Latency = 25; + let ResourceCycles = [25];} + +def : WriteRes<WriteNop, []>; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +let Unsupported = 1 in { +def : WriteRes<WriteIALU32, []>; +def : WriteRes<WriteShift32, []>; +def : WriteRes<WriteIMul32, []>; +def : WriteRes<WriteIDiv32, []>; +def : WriteRes<WriteSTD, []>; +def : WriteRes<WriteLDWU, []>; +def : WriteRes<WriteLDD, []>; +def : WriteRes<WriteAtomicD, []>; +def : WriteRes<WriteAtomicLDD, []>; +def : WriteRes<WriteAtomicSTD, []>; +def : WriteRes<WriteFCvtI64ToF32, []>; +def : WriteRes<WriteFCvtI64ToF64, []>; +def : WriteRes<WriteFCvtF64ToI64, []>; +def : WriteRes<WriteFCvtF32ToI64, []>; +def : WriteRes<WriteFMovI64ToF64, []>; +def : WriteRes<WriteFMovF64ToI64, []>; +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types with cycles. +// Dummy definitions for RocketCore. +def : ReadAdvance<ReadJmp, 0>; +def : ReadAdvance<ReadJalr, 0>; +def : ReadAdvance<ReadCSR, 0>; +def : ReadAdvance<ReadStoreData, 0>; +def : ReadAdvance<ReadMemBase, 0>; +def : ReadAdvance<ReadIALU, 0>; +def : ReadAdvance<ReadIALU32, 0>; +def : ReadAdvance<ReadShift, 0>; +def : ReadAdvance<ReadShift32, 0>; +def : ReadAdvance<ReadIDiv, 0>; +def : ReadAdvance<ReadIDiv32, 0>; +def : ReadAdvance<ReadIMul, 0>; +def : ReadAdvance<ReadIMul32, 0>; +def : ReadAdvance<ReadAtomicWA, 0>; +def : ReadAdvance<ReadAtomicWD, 0>; +def : ReadAdvance<ReadAtomicDA, 0>; +def : ReadAdvance<ReadAtomicDD, 0>; +def : ReadAdvance<ReadAtomicLDW, 0>; +def : ReadAdvance<ReadAtomicLDD, 0>; +def : ReadAdvance<ReadAtomicSTW, 0>; +def : ReadAdvance<ReadAtomicSTD, 0>; +def : ReadAdvance<ReadFALU32, 0>; +def : ReadAdvance<ReadFALU64, 0>; +def : ReadAdvance<ReadFMul32, 0>; +def : ReadAdvance<ReadFMulAdd32, 0>; +def : ReadAdvance<ReadFMulSub32, 0>; +def : ReadAdvance<ReadFMul64, 0>; +def : ReadAdvance<ReadFMulAdd64, 0>; +def : ReadAdvance<ReadFMulSub64, 0>; +def : ReadAdvance<ReadFDiv32, 0>; +def : ReadAdvance<ReadFDiv64, 0>; +def : ReadAdvance<ReadFSqrt32, 0>; +def : ReadAdvance<ReadFSqrt64, 0>; +def : ReadAdvance<ReadFCmp32, 0>; +def : ReadAdvance<ReadFCmp64, 0>; +def : ReadAdvance<ReadFCvtF32ToI32, 0>; +def : ReadAdvance<ReadFCvtF32ToI64, 0>; +def : ReadAdvance<ReadFCvtF64ToI32, 0>; +def : ReadAdvance<ReadFCvtF64ToI64, 0>; +def : ReadAdvance<ReadFCvtI32ToF32, 0>; +def : ReadAdvance<ReadFCvtI32ToF64, 0>; +def : ReadAdvance<ReadFCvtI64ToF32, 0>; +def : ReadAdvance<ReadFCvtI64ToF64, 0>; +def : ReadAdvance<ReadFCvtF32ToF64, 0>; +def : ReadAdvance<ReadFCvtF64ToF32, 0>; +def : ReadAdvance<ReadFMovF32ToI32, 0>; +def : ReadAdvance<ReadFMovI32ToF32, 0>; +def : ReadAdvance<ReadFMovF64ToI64, 0>; +def : ReadAdvance<ReadFMovI64ToF64, 0>; +def : ReadAdvance<ReadFClass32, 0>; +def : ReadAdvance<ReadFClass64, 0>; +} diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket64.td b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td new file mode 100644 index 000000000000..79e79f90f2f0 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket64.td @@ -0,0 +1,214 @@ +//==- RISCVSchedRocket64.td - Rocket Scheduling Definitions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedule.h for details. + +// Rocket machine model for scheduling and other instruction cost heuristics. +def Rocket64Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since Rocket is in-order. + let IssueWidth = 1; // 1 micro-ops are dispatched per cycle. + let LoadLatency = 3; + let MispredictPenalty = 3; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Rocket is in-order. + +let BufferSize = 0 in { +def Rocket64UnitALU : ProcResource<1>; // Int ALU +def Rocket64UnitIMul : ProcResource<1>; // Int Multiply +def Rocket64UnitMem : ProcResource<1>; // Load/Store +def Rocket64UnitB : ProcResource<1>; // Branch + +def Rocket64UnitFPALU : ProcResource<1>; // FP ALU +} + +let BufferSize = 1 in { +def Rocket64UnitIDiv : ProcResource<1>; // Int Division +def Rocket64UnitFPDivSqrt : ProcResource<1>; // FP Divide/Sqrt +} + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = Rocket64Model in { + +def : WriteRes<WriteJmp, [Rocket64UnitB]>; +def : WriteRes<WriteJal, [Rocket64UnitB]>; +def : WriteRes<WriteJalr, [Rocket64UnitB]>; +def : WriteRes<WriteJmpReg, [Rocket64UnitB]>; + +def : WriteRes<WriteIALU32, [Rocket64UnitALU]>; +def : WriteRes<WriteIALU, [Rocket64UnitALU]>; +def : WriteRes<WriteShift32, [Rocket64UnitALU]>; +def : WriteRes<WriteShift, [Rocket64UnitALU]>; + +let Latency = 4 in { +def : WriteRes<WriteIMul, [Rocket64UnitIMul]>; +def : WriteRes<WriteIMul32, [Rocket64UnitIMul]>; +} + +// Integer divide varies based on operand magnitude and sign; worse case latency is 34. +def : WriteRes<WriteIDiv32, [Rocket64UnitIDiv]> { + let Latency = 34; + let ResourceCycles = [34]; +} +def : WriteRes<WriteIDiv, [Rocket64UnitIDiv]> { + let Latency = 33; + let ResourceCycles = [33]; +} + +// Memory +def : WriteRes<WriteSTB, [Rocket64UnitMem]>; +def : WriteRes<WriteSTH, [Rocket64UnitMem]>; +def : WriteRes<WriteSTW, [Rocket64UnitMem]>; +def : WriteRes<WriteSTD, [Rocket64UnitMem]>; +def : WriteRes<WriteFST32, [Rocket64UnitMem]>; +def : WriteRes<WriteFST64, [Rocket64UnitMem]>; + +let Latency = 3 in { +def : WriteRes<WriteLDB, [Rocket64UnitMem]>; +def : WriteRes<WriteLDH, [Rocket64UnitMem]>; +def : WriteRes<WriteCSR, [Rocket64UnitALU]>; +} + +let Latency = 2 in { +def : WriteRes<WriteLDW, [Rocket64UnitMem]>; +def : WriteRes<WriteLDWU, [Rocket64UnitMem]>; +def : WriteRes<WriteLDD, [Rocket64UnitMem]>; +def : WriteRes<WriteFLD32, [Rocket64UnitMem]>; +def : WriteRes<WriteFLD64, [Rocket64UnitMem]>; + +def : WriteRes<WriteAtomicW, [Rocket64UnitMem]>; +def : WriteRes<WriteAtomicD, [Rocket64UnitMem]>; + +def : WriteRes<WriteAtomicLDW, [Rocket64UnitMem]>; +def : WriteRes<WriteAtomicLDD, [Rocket64UnitMem]>; +} + +def : WriteRes<WriteAtomicSTW, [Rocket64UnitMem]>; +def : WriteRes<WriteAtomicSTD, [Rocket64UnitMem]>; + +// Most FP single precision operations are 4 cycles +def : WriteRes<WriteFALU32, [Rocket64UnitFPALU]> { let Latency = 4; } + +// Most FP double precision operations are 6 cycles +def : WriteRes<WriteFALU64, [Rocket64UnitFPALU]> { let Latency = 6; } + +// Conversion instructions +let Latency = 2 in { +def : WriteRes<WriteFCvtI32ToF32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtI32ToF64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtI64ToF32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtI64ToF64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF32ToI32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF32ToI64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF64ToI32, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF64ToI64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF32ToF64, [Rocket32UnitFPALU]>; +def : WriteRes<WriteFCvtF64ToF32, [Rocket32UnitFPALU]>; + +def : WriteRes<WriteFClass32, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFClass64, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFCmp32, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFCmp64, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMovF32ToI32, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMovI32ToF32, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMovF64ToI64, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMovI64ToF64, [Rocket64UnitFPALU]>; +} + +let Latency = 5 in { +def : WriteRes<WriteFMul32, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMulAdd32, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMulSub32, [Rocket64UnitFPALU]>; +} + +let Latency = 7 in { +def : WriteRes<WriteFMul64, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMulAdd64, [Rocket64UnitFPALU]>; +def : WriteRes<WriteFMulSub64, [Rocket64UnitFPALU]>; +} + +// FP Divide unit on Rocket is not pipelined, so set resource cycles to latency +let Latency = 20, ResourceCycles = [20] in { +def : WriteRes<WriteFDiv32, [Rocket64UnitFPDivSqrt]>; +def : WriteRes<WriteFDiv64, [Rocket64UnitFPDivSqrt]>; +} + +// FP Sqrt unit on Rocket is not pipelined, so set resource cycles to latency +def : WriteRes<WriteFSqrt32, [Rocket64UnitFPDivSqrt]> { let Latency = 20; + let ResourceCycles = [20]; } +def : WriteRes<WriteFSqrt64, [Rocket64UnitFPDivSqrt]> { let Latency = 25; + let ResourceCycles = [25]; } + +def : WriteRes<WriteNop, []>; + +def : InstRW<[WriteIALU], (instrs COPY)>; + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types with cycles. +// Dummy definitions for RocketCore. +def : ReadAdvance<ReadJmp, 0>; +def : ReadAdvance<ReadJalr, 0>; +def : ReadAdvance<ReadCSR, 0>; +def : ReadAdvance<ReadStoreData, 0>; +def : ReadAdvance<ReadMemBase, 0>; +def : ReadAdvance<ReadIALU, 0>; +def : ReadAdvance<ReadIALU32, 0>; +def : ReadAdvance<ReadShift, 0>; +def : ReadAdvance<ReadShift32, 0>; +def : ReadAdvance<ReadIDiv, 0>; +def : ReadAdvance<ReadIDiv32, 0>; +def : ReadAdvance<ReadIMul, 0>; +def : ReadAdvance<ReadIMul32, 0>; +def : ReadAdvance<ReadAtomicWA, 0>; +def : ReadAdvance<ReadAtomicWD, 0>; +def : ReadAdvance<ReadAtomicDA, 0>; +def : ReadAdvance<ReadAtomicDD, 0>; +def : ReadAdvance<ReadAtomicLDW, 0>; +def : ReadAdvance<ReadAtomicLDD, 0>; +def : ReadAdvance<ReadAtomicSTW, 0>; +def : ReadAdvance<ReadAtomicSTD, 0>; +def : ReadAdvance<ReadFALU32, 0>; +def : ReadAdvance<ReadFALU64, 0>; +def : ReadAdvance<ReadFMul32, 0>; +def : ReadAdvance<ReadFMulAdd32, 0>; +def : ReadAdvance<ReadFMulSub32, 0>; +def : ReadAdvance<ReadFMul64, 0>; +def : ReadAdvance<ReadFMulAdd64, 0>; +def : ReadAdvance<ReadFMulSub64, 0>; +def : ReadAdvance<ReadFDiv32, 0>; +def : ReadAdvance<ReadFDiv64, 0>; +def : ReadAdvance<ReadFSqrt32, 0>; +def : ReadAdvance<ReadFSqrt64, 0>; +def : ReadAdvance<ReadFCmp32, 0>; +def : ReadAdvance<ReadFCmp64, 0>; +def : ReadAdvance<ReadFCvtF32ToI32, 0>; +def : ReadAdvance<ReadFCvtF32ToI64, 0>; +def : ReadAdvance<ReadFCvtF64ToI32, 0>; +def : ReadAdvance<ReadFCvtF64ToI64, 0>; +def : ReadAdvance<ReadFCvtI32ToF32, 0>; +def : ReadAdvance<ReadFCvtI32ToF64, 0>; +def : ReadAdvance<ReadFCvtI64ToF32, 0>; +def : ReadAdvance<ReadFCvtI64ToF64, 0>; +def : ReadAdvance<ReadFCvtF32ToF64, 0>; +def : ReadAdvance<ReadFCvtF64ToF32, 0>; +def : ReadAdvance<ReadFMovF32ToI32, 0>; +def : ReadAdvance<ReadFMovI32ToF32, 0>; +def : ReadAdvance<ReadFMovF64ToI64, 0>; +def : ReadAdvance<ReadFMovI64ToF64, 0>; +def : ReadAdvance<ReadFClass32, 0>; +def : ReadAdvance<ReadFClass64, 0>; +} diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td new file mode 100644 index 000000000000..9e2762a5d171 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -0,0 +1,138 @@ +//===-- RISCVSchedule.td - RISCV Scheduling Definitions -------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// Define scheduler resources associated with def operands. +def WriteIALU : SchedWrite; // 32 or 64-bit integer ALU operations +def WriteIALU32 : SchedWrite; // 32-bit integer ALU operations on RV64I +def WriteShift32 : SchedWrite; // 32-bit shift operations on RV64Ix +def WriteShift : SchedWrite; // 32 or 64-bit shift operations +def WriteIDiv : SchedWrite; // 32-bit or 64-bit divide and remainder +def WriteIDiv32 : SchedWrite; // 32-bit divide and remainder on RV64I +def WriteIMul : SchedWrite; // 32-bit or 64-bit multiply +def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I +def WriteJmp : SchedWrite; // Jump +def WriteJal : SchedWrite; // Jump and link +def WriteJalr : SchedWrite; // Jump and link register +def WriteJmpReg : SchedWrite; // Jump register +def WriteNop : SchedWrite; +def WriteLDB : SchedWrite; // Load byte +def WriteLDH : SchedWrite; // Load half-word +def WriteLDW : SchedWrite; // Load word +def WriteLDWU : SchedWrite; // Load word unsigned +def WriteLDD : SchedWrite; // Load double-word +def WriteCSR : SchedWrite; // CSR instructions +def WriteSTB : SchedWrite; // Store byte +def WriteSTH : SchedWrite; // Store half-word +def WriteSTW : SchedWrite; // Store word +def WriteSTD : SchedWrite; // Store double-word +def WriteAtomicW : SchedWrite; //Atomic memory operation word size +def WriteAtomicD : SchedWrite; //Atomic memory operation double word size +def WriteAtomicLDW : SchedWrite; // Atomic load word +def WriteAtomicLDD : SchedWrite; // Atomic load double word +def WriteAtomicSTW : SchedWrite; // Atomic store word +def WriteAtomicSTD : SchedWrite; // Atomic store double word +def WriteFALU32 : SchedWrite; // FP 32-bit computation +def WriteFALU64 : SchedWrite; // FP 64-bit computation +def WriteFMul32 : SchedWrite; // 32-bit floating point multiply +def WriteFMulAdd32 : SchedWrite; // 32-bit floating point multiply add +def WriteFMulSub32 : SchedWrite; // 32-bit floating point multiply sub +def WriteFMul64 : SchedWrite; // 64-bit floating point multiply +def WriteFMulAdd64 : SchedWrite; // 64-bit floating point multiply add +def WriteFMulSub64 : SchedWrite; // 64-bit floating point multiply sub +def WriteFDiv32 : SchedWrite; // 32-bit floating point divide +def WriteFDiv64 : SchedWrite; // 64-bit floating point divide +def WriteFSqrt32 : SchedWrite; // 32-bit floating point sqrt +def WriteFSqrt64 : SchedWrite; // 64-bit floating point sqrt + +// Integer to float conversions +def WriteFCvtI32ToF32 : SchedWrite; +def WriteFCvtI32ToF64 : SchedWrite; +def WriteFCvtI64ToF32 : SchedWrite; // RV64I only +def WriteFCvtI64ToF64 : SchedWrite; // RV64I only + +//Float to integer conversions +def WriteFCvtF32ToI32 : SchedWrite; +def WriteFCvtF32ToI64 : SchedWrite; // RV64I only +def WriteFCvtF64ToI32 : SchedWrite; +def WriteFCvtF64ToI64 : SchedWrite; // RV64I only + +// Float to float conversions +def WriteFCvtF32ToF64 : SchedWrite; +def WriteFCvtF64ToF32 : SchedWrite; + +def WriteFConv32 : SchedWrite; // 32-bit floating point convert +def WriteFConv64 : SchedWrite; // 64-bit floating point convert +def WriteFClass32 : SchedWrite; // 32-bit floating point classify +def WriteFClass64 : SchedWrite; // 64-bit floating point classify +def WriteFCmp32 : SchedWrite; // 32-bit floating point compare +def WriteFCmp64 : SchedWrite; // 64-bit floating point compare + +def WriteFMovF32ToI32 : SchedWrite; +def WriteFMovI32ToF32 : SchedWrite; +def WriteFMovF64ToI64 : SchedWrite; // RV64I only +def WriteFMovI64ToF64 : SchedWrite; // RV64I only + +def WriteFMov32 : SchedWrite; // 32-bit floating point move +def WriteFMov64 : SchedWrite; // 64-bit floating point move +def WriteFLD32 : SchedWrite; // Floating point sp load +def WriteFLD64 : SchedWrite; // Floating point dp load +def WriteFST32 : SchedWrite; // Floating point sp store +def WriteFST64 : SchedWrite; // Floating point dp store + +/// Define scheduler resources associated with use operands. +def ReadJmp : SchedRead; +def ReadJalr : SchedRead; +def ReadCSR : SchedRead; +def ReadMemBase : SchedRead; +def ReadStoreData : SchedRead; +def ReadIALU : SchedRead; +def ReadIALU32 : SchedRead; // 32-bit integer ALU operations on RV64I +def ReadShift : SchedRead; +def ReadShift32 : SchedRead; // 32-bit shift operations on RV64Ix +def ReadIDiv : SchedRead; +def ReadIDiv32 : SchedRead; +def ReadIMul : SchedRead; +def ReadIMul32 : SchedRead; +def ReadAtomicWA : SchedRead; +def ReadAtomicWD : SchedRead; +def ReadAtomicDA : SchedRead; +def ReadAtomicDD : SchedRead; +def ReadAtomicLDW : SchedRead; // Atomic load word +def ReadAtomicLDD : SchedRead; // Atomic load double word +def ReadAtomicSTW : SchedRead; // Atomic store word +def ReadAtomicSTD : SchedRead; // Atomic store double word +def ReadFALU32 : SchedRead; // FP 32-bit computation +def ReadFALU64 : SchedRead; // FP 64-bit computation +def ReadFMul32 : SchedRead; // 32-bit floating point multiply +def ReadFMulAdd32 : SchedRead; // 32-bit floating point multiply add +def ReadFMulSub32 : SchedRead; // 32-bit floating point multiply sub +def ReadFMul64 : SchedRead; // 64-bit floating point multiply +def ReadFMulAdd64 : SchedRead; // 64-bit floating point multiply add +def ReadFMulSub64 : SchedRead; // 64-bit floating point multiply sub +def ReadFDiv32 : SchedRead; // 32-bit floating point divide +def ReadFDiv64 : SchedRead; // 64-bit floating point divide +def ReadFSqrt32 : SchedRead; // 32-bit floating point sqrt +def ReadFSqrt64 : SchedRead; // 64-bit floating point sqrt +def ReadFCmp32 : SchedRead; +def ReadFCmp64 : SchedRead; +def ReadFCvtF32ToI32 : SchedRead; +def ReadFCvtF32ToI64 : SchedRead; +def ReadFCvtF64ToI32 : SchedRead; +def ReadFCvtF64ToI64 : SchedRead; +def ReadFCvtI32ToF32 : SchedRead; +def ReadFCvtI32ToF64 : SchedRead; +def ReadFCvtI64ToF32 : SchedRead; +def ReadFCvtI64ToF64 : SchedRead; +def ReadFMovF32ToI32 : SchedRead; +def ReadFMovI32ToF32 : SchedRead; +def ReadFMovF64ToI64 : SchedRead; +def ReadFMovI64ToF64 : SchedRead; +def ReadFCvtF32ToF64 : SchedRead; +def ReadFCvtF64ToF32 : SchedRead; +def ReadFClass32 : SchedRead; +def ReadFClass64 : SchedRead; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 2bb26988c7da..de71c01753de 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -15,6 +15,7 @@ #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" +#include "Utils/RISCVBaseInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" @@ -89,8 +90,17 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const { // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS, - Options.MCOptions.getABIName(), *this); + auto ABIName = Options.MCOptions.getABIName(); + if (const MDString *ModuleTargetABI = dyn_cast_or_null<MDString>( + F.getParent()->getModuleFlag("target-abi"))) { + auto TargetABI = RISCVABI::getTargetABI(ABIName); + if (TargetABI != RISCVABI::ABI_Unknown && + ModuleTargetABI->getString() != ABIName) { + report_fatal_error("-target-abi option != target-abi module flag"); + } + ABIName = ModuleTargetABI->getString(); + } + I = std::make_unique<RISCVSubtarget>(TargetTriple, CPU, FS, ABIName, *this); } return I.get(); } diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp index 432ebb294d46..43b1f8b80c5f 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.cpp @@ -12,16 +12,7 @@ namespace RISCVSysReg { namespace RISCVABI { ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName) { - auto TargetABI = StringSwitch<ABI>(ABIName) - .Case("ilp32", ABI_ILP32) - .Case("ilp32f", ABI_ILP32F) - .Case("ilp32d", ABI_ILP32D) - .Case("ilp32e", ABI_ILP32E) - .Case("lp64", ABI_LP64) - .Case("lp64f", ABI_LP64F) - .Case("lp64d", ABI_LP64D) - .Default(ABI_Unknown); - + auto TargetABI = getTargetABI(ABIName); bool IsRV64 = TT.isArch64Bit(); bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; @@ -58,6 +49,19 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, return ABI_ILP32; } +ABI getTargetABI(StringRef ABIName) { + auto TargetABI = StringSwitch<ABI>(ABIName) + .Case("ilp32", ABI_ILP32) + .Case("ilp32f", ABI_ILP32F) + .Case("ilp32d", ABI_ILP32D) + .Case("ilp32e", ABI_ILP32E) + .Case("lp64", ABI_LP64) + .Case("lp64f", ABI_LP64F) + .Case("lp64d", ABI_LP64D) + .Default(ABI_Unknown); + return TargetABI; +} + // To avoid the BP value clobbered by a function call, we need to choose a // callee saved register to save the value. RV32E only has X8 and X9 as callee // saved registers and X8 will be used as fp. So we choose X9 as bp. diff --git a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h index cf078df9609a..d36c528bba1e 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVBaseInfo.h @@ -202,6 +202,8 @@ enum ABI { ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName); +ABI getTargetABI(StringRef ABIName); + // Returns the register used to hold the stack pointer after realignment. Register getBPReg(); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index c73905d3357a..ab00069497af 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -6859,8 +6859,6 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, for (MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); NextMIIt != MBB->end(); ++NextMIIt) { - if (NextMIIt->definesRegister(SystemZ::CC)) - break; if (isSelectPseudo(*NextMIIt)) { assert(NextMIIt->getOperand(3).getImm() == CCValid && "Bad CCValid operands since CC was not redefined."); @@ -6871,6 +6869,9 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI, } break; } + if (NextMIIt->definesRegister(SystemZ::CC) || + NextMIIt->usesCustomInsertionHook()) + break; bool User = false; for (auto SelMI : Selects) if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index d1f3acbd221e..3e905c18fa3b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -751,6 +751,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); if (!II) continue; + Changed = true; LandingPads.insert(II->getLandingPadInst()); IRB.SetInsertPoint(II); @@ -791,6 +792,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { auto *RI = dyn_cast<ResumeInst>(&I); if (!RI) continue; + Changed = true; // Split the input into legal values Value *Input = RI->getValue(); @@ -815,6 +817,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { continue; if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) continue; + Changed = true; IRB.SetInsertPoint(CI); CallInst *NewCI = @@ -830,7 +833,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { if (auto *LPI = dyn_cast<LandingPadInst>(I)) LandingPads.insert(LPI); } - Changed = !LandingPads.empty(); + Changed |= !LandingPads.empty(); // Handle all the landingpad for this function together, as multiple invokes // may share a single lp diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index dffda5217675..2284cd7a70b8 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -85,13 +85,13 @@ cl::opt<unsigned> X86AlignBranchBoundary( cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( "x86-align-branch", - cl::desc("Specify types of branches to align (plus separated list of " - "types). The branches's types are combination of jcc, fused, " - "jmp, call, ret, indirect."), - cl::value_desc("jcc indicates conditional jumps, fused indicates fused " - "conditional jumps, jmp indicates unconditional jumps, call " - "indicates direct and indirect calls, ret indicates rets, " - "indirect indicates indirect jumps."), + cl::desc( + "Specify types of branches to align. The branches's types are " + "combination of jcc, fused, jmp, call, ret, indirect. jcc indicates " + "conditional jumps, fused indicates fused conditional jumps, jmp " + "indicates unconditional jumps, call indicates direct and indirect " + "calls, ret indicates rets, indirect indicates indirect jumps."), + cl::value_desc("(plus separated list of types)"), cl::location(X86AlignBranchKindLoc)); cl::opt<bool> X86AlignBranchWithin32BBoundaries( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0f152968ddfd..cbdd7135de43 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21056,7 +21056,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, // Divide by pow2. SDValue SRA = - DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i64)); + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8)); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 2fc9a2af01d7..7f49c6e861d4 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -2002,6 +2002,25 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { break; } + case X86::ENDBR32: + case X86::ENDBR64: { + // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for + // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be + // non-empty. If MI is the initial ENDBR, place the + // __patchable_function_entries label after ENDBR. + if (CurrentPatchableFunctionEntrySym && + CurrentPatchableFunctionEntrySym == CurrentFnBegin && + MI == &MF->front().front()) { + MCInst Inst; + MCInstLowering.Lower(MI, Inst); + EmitAndCountInstruction(Inst); + CurrentPatchableFunctionEntrySym = createTempSymbol("patch"); + OutStreamer->EmitLabel(CurrentPatchableFunctionEntrySym); + return; + } + break; + } + case X86::TAILJMPr: case X86::TAILJMPm: case X86::TAILJMPd: |