diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp | 119 |
1 files changed, 70 insertions, 49 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index 581168b31384..af9fef0720f9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -116,6 +116,12 @@ void ModuloScheduleExpander::generatePipelinedLoop() { // a map between register names in the original block and the names created // in each stage of the pipelined loop. ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + + // The renaming destination by Phis for the registers across stages. + // This map is updated during Phis generation to point to the most recent + // renaming destination. + ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; SmallVector<MachineBasicBlock *, 4> PrologBBs; @@ -151,14 +157,15 @@ void ModuloScheduleExpander::generatePipelinedLoop() { generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, - MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi, + InstrMap, MaxStageCount, MaxStageCount, false); LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); SmallVector<MachineBasicBlock *, 4> EpilogBBs; // Generate the epilog instructions to complete the pipeline. - generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs); + generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs, + PrologBBs); // We need this step because the register allocation doesn't handle some // situations well, so we insert copies to help out. @@ -171,6 +178,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() { addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); delete[] VRMap; + delete[] VRMapPhi; } void ModuloScheduleExpander::cleanup() { @@ -242,7 +250,8 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage, /// block for each stage that needs to complete. void ModuloScheduleExpander::generateEpilog( unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, - ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) { + ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { // We need to change the branch from the kernel to the first epilog block, so // this call to analyze branch uses the kernel rather than the original BB. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -296,8 +305,8 @@ void ModuloScheduleExpander::generateEpilog( } generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, - LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi, + InstrMap, LastStage, EpilogStage, i == 1); PredBB = NewBB; LLVM_DEBUG({ @@ -593,8 +602,9 @@ void ModuloScheduleExpander::generateExistingPhis( /// use in the pipelined sequence. void ModuloScheduleExpander::generatePhis( MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, - unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi, + InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, + bool IsLast) { // Compute the stage number that contains the initial Phi value, and // the Phi from the previous stage. unsigned PrologStage = 0; @@ -614,8 +624,7 @@ void ModuloScheduleExpander::generatePhis( BBI != BBE; ++BBI) { for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) { MachineOperand &MO = BBI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || - !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; int StageScheduled = Schedule.getStage(&*BBI); @@ -631,26 +640,49 @@ void ModuloScheduleExpander::generatePhis( if (!InKernel && (unsigned)StageScheduled > PrologStage) continue; - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + unsigned PhiOp2; + if (InKernel) { + PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + } // The number of Phis can't exceed the number of prolog stages. The // prolog stage number is zero based. if (NumPhis > PrologStage + 1 - StageScheduled) NumPhis = PrologStage + 1 - StageScheduled; for (unsigned np = 0; np < NumPhis; ++np) { + // Example for + // Org: + // %Org = ... (Scheduled at Stage#0, NumPhi = 2) + // + // Prolog0 (Stage0): + // %Clone0 = ... + // Prolog1 (Stage1): + // %Clone1 = ... + // Kernel (Stage2): + // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel + // %Clone2 = ... + // Epilog0 (Stage3): + // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel + // Epilog1 (Stage4): + // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0 + // + // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2} + // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0} + // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2} + unsigned PhiOp1 = VRMap[PrologStage][Def]; if (np <= PrologStage) PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + if (!InKernel) { + if (PrevStage == LastStageNum && np == 0) + PhiOp2 = VRMap[LastStageNum][Def]; + else + PhiOp2 = VRMapPhi[PrevStage - np][Def]; } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; const TargetRegisterClass *RC = MRI.getRegClass(Def); Register NewReg = MRI.createVirtualRegister(RC); @@ -672,9 +704,9 @@ void ModuloScheduleExpander::generatePhis( NewReg); PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; + VRMapPhi[PrevStage - np - 1][Def] = NewReg; } else { - VRMap[CurStageNum - np][Def] = NewReg; + VRMapPhi[CurStageNum - np][Def] = NewReg; if (np == NumPhis - 1) rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, NewReg); @@ -716,7 +748,7 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, continue; Register reg = MO.getReg(); // Assume physical registers are used, unless they are marked dead. - if (Register::isPhysicalRegister(reg)) { + if (reg.isPhysical()) { used = !MO.isDead(); if (used) break; @@ -847,7 +879,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, MachineBasicBlock *Epilog = EpilogBBs[i]; SmallVector<MachineOperand, 4> Cond; - Optional<bool> StaticallyGreater = + std::optional<bool> StaticallyGreater = LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); unsigned numAdded = 0; if (!StaticallyGreater) { @@ -965,17 +997,6 @@ MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) { MachineInstr *NewMI = MF.CloneMachineInstr(OldMI); - // Check for tied operands in inline asm instructions. This should be handled - // elsewhere, but I'm not sure of the best solution. - if (OldMI->isInlineAsm()) - for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) { - const auto &MO = OldMI->getOperand(i); - if (MO.isReg() && MO.isUse()) - break; - unsigned UseIdx; - if (OldMI->isRegTiedToUseOperand(i, &UseIdx)) - NewMI->tieOperands(i, UseIdx); - } updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum); return NewMI; } @@ -1010,7 +1031,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, unsigned InstrStageNum, ValueMapTy *VRMap) { for (MachineOperand &MO : NewMI->operands()) { - if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) + if (!MO.isReg() || !MO.getReg().isVirtual()) continue; Register reg = MO.getReg(); if (MO.isDef()) { @@ -1259,7 +1280,7 @@ class KernelRewriter { // Insert a phi that carries LoopReg from the loop body and InitReg otherwise. // If InitReg is not given it is chosen arbitrarily. It will either be undef // or will be chosen so as to share another phi. - Register phi(Register LoopReg, Optional<Register> InitReg = {}, + Register phi(Register LoopReg, std::optional<Register> InitReg = {}, const TargetRegisterClass *RC = nullptr); // Create an undef register of the given register class. Register undef(const TargetRegisterClass *RC); @@ -1367,7 +1388,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { // First, dive through the phi chain to find the defaults for the generated // phis. - SmallVector<Optional<Register>, 4> Defaults; + SmallVector<std::optional<Register>, 4> Defaults; Register LoopReg = Reg; auto LoopProducer = Producer; while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) { @@ -1378,7 +1399,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { } int LoopProducerStage = S.getStage(LoopProducer); - Optional<Register> IllegalPhiDefault; + std::optional<Register> IllegalPhiDefault; if (LoopProducerStage == -1) { // Do nothing. @@ -1410,9 +1431,9 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { // If we need more phis than we have defaults for, pad out with undefs for // the earliest phis, which are at the end of the defaults chain (the // chain is in reverse order). - Defaults.resize(Defaults.size() + StageDiff, Defaults.empty() - ? Optional<Register>() - : Defaults.back()); + Defaults.resize(Defaults.size() + StageDiff, + Defaults.empty() ? std::optional<Register>() + : Defaults.back()); } } @@ -1444,11 +1465,11 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { return LoopReg; } -Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, +Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg, const TargetRegisterClass *RC) { // If the init register is not undef, try and find an existing phi. if (InitReg) { - auto I = Phis.find({LoopReg, InitReg.value()}); + auto I = Phis.find({LoopReg, *InitReg}); if (I != Phis.end()) return I->second; } else { @@ -1469,10 +1490,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, return R; // Found a phi taking undef as input, so rewrite it to take InitReg. MachineInstr *MI = MRI.getVRegDef(R); - MI->getOperand(1).setReg(InitReg.value()); - Phis.insert({{LoopReg, InitReg.value()}, R}); + MI->getOperand(1).setReg(*InitReg); + Phis.insert({{LoopReg, *InitReg}, R}); const TargetRegisterClass *ConstrainRegClass = - MRI.constrainRegClass(R, MRI.getRegClass(InitReg.value())); + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); assert(ConstrainRegClass && "Expected a valid constrained register class!"); (void)ConstrainRegClass; UndefPhis.erase(I); @@ -1941,7 +1962,7 @@ void PeelingModuloScheduleExpander::fixupBranches() { MachineBasicBlock *Epilog = *EI; SmallVector<MachineOperand, 4> Cond; TII->removeBranch(*Prolog); - Optional<bool> StaticallyGreater = + std::optional<bool> StaticallyGreater = LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond); if (!StaticallyGreater) { LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); |