aboutsummaryrefslogtreecommitdiff
path: root/include/llvm/CodeGen/ModuloSchedule.h
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-10-23 17:51:42 +0000
commit1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /include/llvm/CodeGen/ModuloSchedule.h
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
Notes
Diffstat (limited to 'include/llvm/CodeGen/ModuloSchedule.h')
-rw-r--r--include/llvm/CodeGen/ModuloSchedule.h367
1 files changed, 367 insertions, 0 deletions
diff --git a/include/llvm/CodeGen/ModuloSchedule.h b/include/llvm/CodeGen/ModuloSchedule.h
new file mode 100644
index 000000000000..81a9b63b64ca
--- /dev/null
+++ b/include/llvm/CodeGen/ModuloSchedule.h
@@ -0,0 +1,367 @@
+//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlaps loop iterations and exploits ILP via compiler transformations.
+//
+// There are multiple methods for analyzing a loop and creating a schedule.
+// An example algorithm is Swing Modulo Scheduling (implemented by the
+// MachinePipeliner). The details of how a schedule is arrived at are irrelevant
+// for the task of actually rewriting a loop to adhere to the schedule, which
+// is what this file does.
+//
+// A schedule is, for every instruction in a block, a Cycle and a Stage. Note
+// that we only support single-block loops, so "block" and "loop" can be used
+// interchangably.
+//
+// The Cycle of an instruction defines a partial order of the instructions in
+// the remapped loop. Instructions within a cycle must not consume the output
+// of any instruction in the same cycle. Cycle information is assumed to have
+// been calculated such that the processor will execute instructions in
+// lock-step (for example in a VLIW ISA).
+//
+// The Stage of an instruction defines the mapping between logical loop
+// iterations and pipelined loop iterations. An example (unrolled) pipeline
+// may look something like:
+//
+// I0[0] Execute instruction I0 of iteration 0
+// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1
+// I1[1], I0[2]
+// I1[2], I0[3]
+//
+// In the schedule for this unrolled sequence we would say that I0 was scheduled
+// in stage 0 and I1 in stage 1:
+//
+// loop:
+// [stage 0] x = I0
+// [stage 1] I1 x (from stage 0)
+//
+// And to actually generate valid code we must insert a phi:
+//
+// loop:
+// x' = phi(x)
+// x = I0
+// I1 x'
+//
+// This is a simple example; the rules for how to generate correct code given
+// an arbitrary schedule containing loop-carried values are complex.
+//
+// Note that these examples only mention the steady-state kernel of the
+// generated loop; prologs and epilogs must be generated also that prime and
+// flush the pipeline. Doing so is nontrivial.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
+#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include <deque>
+#include <vector>
+
+namespace llvm {
+class MachineBasicBlock;
+class MachineInstr;
+class LiveIntervals;
+
+/// Represents a schedule for a single-block loop. For every instruction we
+/// maintain a Cycle and Stage.
+class ModuloSchedule {
+private:
+ /// The block containing the loop instructions.
+ MachineLoop *Loop;
+
+ /// The instructions to be generated, in total order. Cycle provides a partial
+ /// order; the total order within cycles has been decided by the schedule
+ /// producer.
+ std::vector<MachineInstr *> ScheduledInstrs;
+
+ /// The cycle for each instruction.
+ DenseMap<MachineInstr *, int> Cycle;
+
+ /// The stage for each instruction.
+ DenseMap<MachineInstr *, int> Stage;
+
+ /// The number of stages in this schedule (Max(Stage) + 1).
+ int NumStages;
+
+public:
+ /// Create a new ModuloSchedule.
+ /// \arg ScheduledInstrs The new loop instructions, in total resequenced
+ /// order.
+ /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
+ /// not need to start at zero. ScheduledInstrs must be partially ordered by
+ /// Cycle.
+ /// \arg Stage Stage index for all instructions in ScheduleInstrs.
+ ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,
+ std::vector<MachineInstr *> ScheduledInstrs,
+ DenseMap<MachineInstr *, int> Cycle,
+ DenseMap<MachineInstr *, int> Stage)
+ : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
+ Stage(std::move(Stage)) {
+ NumStages = 0;
+ for (auto &KV : this->Stage)
+ NumStages = std::max(NumStages, KV.second);
+ ++NumStages;
+ }
+
+ /// Return the single-block loop being scheduled.
+ MachineLoop *getLoop() const { return Loop; }
+
+ /// Return the number of stages contained in this schedule, which is the
+ /// largest stage index + 1.
+ int getNumStages() const { return NumStages; }
+
+ /// Return the first cycle in the schedule, which is the cycle index of the
+ /// first instruction.
+ int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
+
+ /// Return the final cycle in the schedule, which is the cycle index of the
+ /// last instruction.
+ int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
+
+ /// Return the stage that MI is scheduled in, or -1.
+ int getStage(MachineInstr *MI) {
+ auto I = Stage.find(MI);
+ return I == Stage.end() ? -1 : I->second;
+ }
+
+ /// Return the cycle that MI is scheduled at, or -1.
+ int getCycle(MachineInstr *MI) {
+ auto I = Cycle.find(MI);
+ return I == Cycle.end() ? -1 : I->second;
+ }
+
+ /// Return the rescheduled instructions in order.
+ ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
+
+ void dump() { print(dbgs()); }
+ void print(raw_ostream &OS);
+};
+
+/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
+/// rewriting the old loop and inserting prologs and epilogs as required.
+class ModuloScheduleExpander {
+public:
+ using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;
+
+private:
+ using ValueMapTy = DenseMap<unsigned, unsigned>;
+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
+
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals &LIS;
+
+ MachineBasicBlock *BB;
+ MachineBasicBlock *Preheader;
+ MachineBasicBlock *NewKernel = nullptr;
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
+
+ /// Map for each register and the max difference between its uses and def.
+ /// The first element in the pair is the max difference in stages. The
+ /// second is true if the register defines a Phi value and loop value is
+ /// scheduled before the Phi.
+ std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+ /// Instructions to change when emitting the final schedule.
+ InstrChangesTy InstrChanges;
+
+ void generatePipelinedLoop();
+ void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
+ void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs);
+ void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast);
+ void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast);
+ void removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs);
+ void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
+ void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+ ValueMapTy *VRMap);
+ bool computeDelta(MachineInstr &MI, unsigned &Delta);
+ void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+ unsigned Num);
+ MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+ unsigned InstStageNum);
+ void updateInstruction(MachineInstr *NewMI, bool LastDef,
+ unsigned CurStageNum, unsigned InstrStageNum,
+ ValueMapTy *VRMap);
+ MachineInstr *findDefInLoop(unsigned Reg);
+ unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+ unsigned LoopStage, ValueMapTy *VRMap,
+ MachineBasicBlock *BB);
+ void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+ ValueMapTy *VRMap, InstrMapTy &InstrMap);
+ void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
+ unsigned CurStageNum, unsigned PhiNum,
+ MachineInstr *Phi, unsigned OldReg,
+ unsigned NewReg, unsigned PrevReg = 0);
+ bool isLoopCarried(MachineInstr &Phi);
+
+ /// Return the max. number of stages/iterations that can occur between a
+ /// register definition and its uses.
+ unsigned getStagesForReg(int Reg, unsigned CurStage) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
+ Stages.second)
+ return 1;
+ return Stages.first;
+ }
+
+ /// The number of stages for a Phi is a little different than other
+ /// instructions. The minimum value computed in RegToStageDiff is 1
+ /// because we assume the Phi is needed for at least 1 iteration.
+ /// This is not the case if the loop value is scheduled prior to the
+ /// Phi in the same stage. This function returns the number of stages
+ /// or iterations needed between the Phi definition and any uses.
+ unsigned getStagesForPhi(int Reg) {
+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+ if (Stages.second)
+ return Stages.first;
+ return Stages.first - 1;
+ }
+
+public:
+ /// Create a new ModuloScheduleExpander.
+ /// \arg InstrChanges Modifications to make to instructions with memory
+ /// operands.
+ /// FIXME: InstrChanges is opaque and is an implementation detail of an
+ /// optimization in MachinePipeliner that crosses abstraction boundaries.
+ ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals &LIS, InstrChangesTy InstrChanges)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS),
+ InstrChanges(std::move(InstrChanges)) {}
+
+ /// Performs the actual expansion.
+ void expand();
+ /// Performs final cleanup after expansion.
+ void cleanup();
+
+ /// Returns the newly rewritten kernel block, or nullptr if this was
+ /// optimized away.
+ MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
+};
+
+/// A reimplementation of ModuloScheduleExpander. It works by generating a
+/// standalone kernel loop and peeling out the prologs and epilogs.
+class PeelingModuloScheduleExpander {
+ ModuloSchedule &Schedule;
+ MachineFunction &MF;
+ const TargetSubtargetInfo &ST;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ /// The original loop block that gets rewritten in-place.
+ MachineBasicBlock *BB;
+ /// The original loop preheader.
+ MachineBasicBlock *Preheader;
+ /// All prolog and epilog blocks.
+ SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
+ /// For every block, the stages that are produced.
+ DenseMap<MachineBasicBlock *, BitVector> LiveStages;
+ /// For every block, the stages that are available. A stage can be available
+ /// but not produced (in the epilog) or produced but not available (in the
+ /// prolog).
+ DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
+
+ /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
+ /// loop kernel clones.
+ DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;
+ DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>
+ BlockMIs;
+
+ /// State passed from peelKernel to peelPrologAndEpilogs().
+ std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
+
+public:
+ PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
+ LiveIntervals *LIS)
+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
+ TII(ST.getInstrInfo()), LIS(LIS) {}
+
+ void expand();
+
+ /// Runs ModuloScheduleExpander and treats it as a golden input to validate
+ /// aspects of the code generated by PeelingModuloScheduleExpander.
+ void validateAgainstModuloScheduleExpander();
+
+protected:
+ /// Converts BB from the original loop body to the rewritten, pipelined
+ /// steady-state.
+ void rewriteKernel();
+
+private:
+ /// Peels one iteration of the rewritten kernel (BB) in the specified
+ /// direction.
+ MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
+ /// Peel the kernel forwards and backwards to produce prologs and epilogs,
+ /// and stitch them together.
+ void peelPrologAndEpilogs();
+ /// All prolog and epilog blocks are clones of the kernel, so any produced
+ /// register in one block has an corollary in all other blocks.
+ Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
+ /// Change all users of MI, if MI is predicated out
+ /// (LiveStages[MI->getParent()] == false).
+ void rewriteUsesOf(MachineInstr *MI);
+ /// Insert branches between prologs, kernel and epilogs.
+ void fixupBranches();
+ /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
+ /// to a block dominated by all prologs and epilogs. This allows us to treat
+ /// the loop exiting block as any other kernel clone.
+ MachineBasicBlock *CreateLCSSAExitingBlock();
+ /// Helper to get the stage of an instruction in the schedule.
+ unsigned getStage(MachineInstr *MI) {
+ if (CanonicalMIs.count(MI))
+ MI = CanonicalMIs[MI];
+ return Schedule.getStage(MI);
+ }
+};
+
+/// Expander that simply annotates each scheduled instruction with a post-instr
+/// symbol that can be consumed by the ModuloScheduleTest pass.
+///
+/// The post-instr symbol is a way of annotating an instruction that can be
+/// roundtripped in MIR. The syntax is:
+/// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
+class ModuloScheduleTestAnnotater {
+ MachineFunction &MF;
+ ModuloSchedule &S;
+
+public:
+ ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
+ : MF(MF), S(S) {}
+
+ /// Performs the annotation.
+ void annotate();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H