src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-10-23 17:51:42 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-10-23 17:51:42 +0000
commit	1d5ae1026e831016fc29fd927877c86af904481f (patch)
tree	2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /include/llvm/CodeGen/ModuloSchedule.h
parent	e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)

vendor/llvm/llvm-trunk-r375505 vendor/llvm

Notes

Diffstat (limited to 'include/llvm/CodeGen/ModuloSchedule.h')

-rw-r--r--

include/llvm/CodeGen/ModuloSchedule.h

367

1 files changed, 367 insertions, 0 deletions

diff --git a/include/llvm/CodeGen/ModuloSchedule.h b/include/llvm/CodeGen/ModuloSchedule.h
new file mode 100644
index 000000000000..81a9b63b64ca
--- /dev/null
+++ b/include/llvm/CodeGen/ModuloSchedule.h

@@ -0,0 +1,367 @@

+//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//

+//

+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

+// See https://llvm.org/LICENSE.txt for license information.

+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

+//

+//===----------------------------------------------------------------------===//

+//

+// Software pipelining (SWP) is an instruction scheduling technique for loops

+// that overlaps loop iterations and exploits ILP via compiler transformations.

+//

+// There are multiple methods for analyzing a loop and creating a schedule.

+// An example algorithm is Swing Modulo Scheduling (implemented by the

+// MachinePipeliner). The details of how a schedule is arrived at are irrelevant

+// for the task of actually rewriting a loop to adhere to the schedule, which

+// is what this file does.

+//

+// A schedule is, for every instruction in a block, a Cycle and a Stage. Note

+// that we only support single-block loops, so "block" and "loop" can be used

+// interchangably.

+//

+// The Cycle of an instruction defines a partial order of the instructions in

+// the remapped loop. Instructions within a cycle must not consume the output

+// of any instruction in the same cycle. Cycle information is assumed to have

+// been calculated such that the processor will execute instructions in

+// lock-step (for example in a VLIW ISA).

+//

+// The Stage of an instruction defines the mapping between logical loop

+// iterations and pipelined loop iterations. An example (unrolled) pipeline

+// may look something like:

+//

+// I0[0] Execute instruction I0 of iteration 0

+// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1

+// I1[1], I0[2]

+// I1[2], I0[3]

+//

+// In the schedule for this unrolled sequence we would say that I0 was scheduled

+// in stage 0 and I1 in stage 1:

+//

+// loop:

+// [stage 0] x = I0

+// [stage 1] I1 x (from stage 0)

+//

+// And to actually generate valid code we must insert a phi:

+//

+// loop:

+// x' = phi(x)

+// x = I0

+// I1 x'

+//

+// This is a simple example; the rules for how to generate correct code given

+// an arbitrary schedule containing loop-carried values are complex.

+//

+// Note that these examples only mention the steady-state kernel of the

+// generated loop; prologs and epilogs must be generated also that prime and

+// flush the pipeline. Doing so is nontrivial.

+//

+//===----------------------------------------------------------------------===//

+#ifndef LLVM_LIB_CODEGEN_MODULOSCHEDULE_H

+#define LLVM_LIB_CODEGEN_MODULOSCHEDULE_H

+#include "llvm/CodeGen/MachineFunction.h"

+#include "llvm/CodeGen/MachineLoopInfo.h"

+#include "llvm/CodeGen/MachineLoopUtils.h"

+#include "llvm/CodeGen/TargetInstrInfo.h"

+#include "llvm/CodeGen/TargetSubtargetInfo.h"

+#include <deque>

+#include <vector>

+namespace llvm {

+class MachineBasicBlock;

+class MachineInstr;

+class LiveIntervals;

+/// Represents a schedule for a single-block loop. For every instruction we

+/// maintain a Cycle and Stage.

+class ModuloSchedule {

+private:

+ /// The block containing the loop instructions.

+ MachineLoop *Loop;

+ /// The instructions to be generated, in total order. Cycle provides a partial

+ /// order; the total order within cycles has been decided by the schedule

+ /// producer.

+ std::vector<MachineInstr *> ScheduledInstrs;

+ /// The cycle for each instruction.

+ DenseMap<MachineInstr *, int> Cycle;

+ /// The stage for each instruction.

+ DenseMap<MachineInstr *, int> Stage;

+ /// The number of stages in this schedule (Max(Stage) + 1).

+ int NumStages;

+public:

+ /// Create a new ModuloSchedule.

+ /// \arg ScheduledInstrs The new loop instructions, in total resequenced

+ /// order.

+ /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does

+ /// not need to start at zero. ScheduledInstrs must be partially ordered by

+ /// Cycle.

+ /// \arg Stage Stage index for all instructions in ScheduleInstrs.

+ ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,

+ std::vector<MachineInstr *> ScheduledInstrs,

+ DenseMap<MachineInstr *, int> Cycle,

+ DenseMap<MachineInstr *, int> Stage)

+ : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),

+ Stage(std::move(Stage)) {

+ NumStages = 0;

+ for (auto &KV : this->Stage)

+ NumStages = std::max(NumStages, KV.second);

+ ++NumStages;

+ }

+ /// Return the single-block loop being scheduled.

+ MachineLoop *getLoop() const { return Loop; }

+ /// Return the number of stages contained in this schedule, which is the

+ /// largest stage index + 1.

+ int getNumStages() const { return NumStages; }

+ /// Return the first cycle in the schedule, which is the cycle index of the

+ /// first instruction.

+ int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }

+ /// Return the final cycle in the schedule, which is the cycle index of the

+ /// last instruction.

+ int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }

+ /// Return the stage that MI is scheduled in, or -1.

+ int getStage(MachineInstr *MI) {

+ auto I = Stage.find(MI);

+ return I == Stage.end() ? -1 : I->second;

+ }

+ /// Return the cycle that MI is scheduled at, or -1.

+ int getCycle(MachineInstr *MI) {

+ auto I = Cycle.find(MI);

+ return I == Cycle.end() ? -1 : I->second;

+ }

+ /// Return the rescheduled instructions in order.

+ ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }

+ void dump() { print(dbgs()); }

+ void print(raw_ostream &OS);

+};

+/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,

+/// rewriting the old loop and inserting prologs and epilogs as required.

+class ModuloScheduleExpander {

+public:

+ using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;

+private:

+ using ValueMapTy = DenseMap<unsigned, unsigned>;

+ using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;

+ using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;

+ ModuloSchedule &Schedule;

+ MachineFunction &MF;

+ const TargetSubtargetInfo &ST;

+ MachineRegisterInfo &MRI;

+ const TargetInstrInfo *TII;

+ LiveIntervals &LIS;

+ MachineBasicBlock *BB;

+ MachineBasicBlock *Preheader;

+ MachineBasicBlock *NewKernel = nullptr;

+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;

+ /// Map for each register and the max difference between its uses and def.

+ /// The first element in the pair is the max difference in stages. The

+ /// second is true if the register defines a Phi value and loop value is

+ /// scheduled before the Phi.

+ std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;

+ /// Instructions to change when emitting the final schedule.

+ InstrChangesTy InstrChanges;

+ void generatePipelinedLoop();

+ void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,

+ ValueMapTy *VRMap, MBBVectorTy &PrologBBs);

+ void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,

+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,

+ MBBVectorTy &PrologBBs);

+ void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,

+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,

+ ValueMapTy *VRMap, InstrMapTy &InstrMap,

+ unsigned LastStageNum, unsigned CurStageNum,

+ bool IsLast);

+ void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,

+ MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,

+ ValueMapTy *VRMap, InstrMapTy &InstrMap,

+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast);

+ void removeDeadInstructions(MachineBasicBlock *KernelBB,

+ MBBVectorTy &EpilogBBs);

+ void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);

+ void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,

+ MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,

+ ValueMapTy *VRMap);

+ bool computeDelta(MachineInstr &MI, unsigned &Delta);

+ void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,

+ unsigned Num);

+ MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,

+ unsigned InstStageNum);

+ MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,

+ unsigned InstStageNum);

+ void updateInstruction(MachineInstr *NewMI, bool LastDef,

+ unsigned CurStageNum, unsigned InstrStageNum,

+ ValueMapTy *VRMap);

+ MachineInstr *findDefInLoop(unsigned Reg);

+ unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,

+ unsigned LoopStage, ValueMapTy *VRMap,

+ MachineBasicBlock *BB);

+ void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,

+ ValueMapTy *VRMap, InstrMapTy &InstrMap);

+ void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,

+ unsigned CurStageNum, unsigned PhiNum,

+ MachineInstr *Phi, unsigned OldReg,

+ unsigned NewReg, unsigned PrevReg = 0);

+ bool isLoopCarried(MachineInstr &Phi);

+ /// Return the max. number of stages/iterations that can occur between a

+ /// register definition and its uses.

+ unsigned getStagesForReg(int Reg, unsigned CurStage) {

+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];

+ if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&

+ Stages.second)

+ return 1;

+ return Stages.first;

+ }

+ /// The number of stages for a Phi is a little different than other

+ /// instructions. The minimum value computed in RegToStageDiff is 1

+ /// because we assume the Phi is needed for at least 1 iteration.

+ /// This is not the case if the loop value is scheduled prior to the

+ /// Phi in the same stage. This function returns the number of stages

+ /// or iterations needed between the Phi definition and any uses.

+ unsigned getStagesForPhi(int Reg) {

+ std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];

+ if (Stages.second)

+ return Stages.first;

+ return Stages.first - 1;

+ }

+public:

+ /// Create a new ModuloScheduleExpander.

+ /// \arg InstrChanges Modifications to make to instructions with memory

+ /// operands.

+ /// FIXME: InstrChanges is opaque and is an implementation detail of an

+ /// optimization in MachinePipeliner that crosses abstraction boundaries.

+ ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,

+ LiveIntervals &LIS, InstrChangesTy InstrChanges)

+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),

+ TII(ST.getInstrInfo()), LIS(LIS),

+ InstrChanges(std::move(InstrChanges)) {}

+ /// Performs the actual expansion.

+ void expand();

+ /// Performs final cleanup after expansion.

+ void cleanup();

+ /// Returns the newly rewritten kernel block, or nullptr if this was

+ /// optimized away.

+ MachineBasicBlock *getRewrittenKernel() { return NewKernel; }

+};

+/// A reimplementation of ModuloScheduleExpander. It works by generating a

+/// standalone kernel loop and peeling out the prologs and epilogs.

+class PeelingModuloScheduleExpander {

+ ModuloSchedule &Schedule;

+ MachineFunction &MF;

+ const TargetSubtargetInfo &ST;

+ MachineRegisterInfo &MRI;

+ const TargetInstrInfo *TII;

+ LiveIntervals *LIS;

+ /// The original loop block that gets rewritten in-place.

+ MachineBasicBlock *BB;

+ /// The original loop preheader.

+ MachineBasicBlock *Preheader;

+ /// All prolog and epilog blocks.

+ SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;

+ /// For every block, the stages that are produced.

+ DenseMap<MachineBasicBlock *, BitVector> LiveStages;

+ /// For every block, the stages that are available. A stage can be available

+ /// but not produced (in the epilog) or produced but not available (in the

+ /// prolog).

+ DenseMap<MachineBasicBlock *, BitVector> AvailableStages;

+ /// CanonicalMIs and BlockMIs form a bidirectional map between any of the

+ /// loop kernel clones.

+ DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;

+ DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>

+ BlockMIs;

+ /// State passed from peelKernel to peelPrologAndEpilogs().

+ std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;

+public:

+ PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,

+ LiveIntervals *LIS)

+ : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),

+ TII(ST.getInstrInfo()), LIS(LIS) {}

+ void expand();

+ /// Runs ModuloScheduleExpander and treats it as a golden input to validate

+ /// aspects of the code generated by PeelingModuloScheduleExpander.

+ void validateAgainstModuloScheduleExpander();

+protected:

+ /// Converts BB from the original loop body to the rewritten, pipelined

+ /// steady-state.

+ void rewriteKernel();

+private:

+ /// Peels one iteration of the rewritten kernel (BB) in the specified

+ /// direction.

+ MachineBasicBlock *peelKernel(LoopPeelDirection LPD);

+ /// Peel the kernel forwards and backwards to produce prologs and epilogs,

+ /// and stitch them together.

+ void peelPrologAndEpilogs();

+ /// All prolog and epilog blocks are clones of the kernel, so any produced

+ /// register in one block has an corollary in all other blocks.

+ Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);

+ /// Change all users of MI, if MI is predicated out

+ /// (LiveStages[MI->getParent()] == false).

+ void rewriteUsesOf(MachineInstr *MI);

+ /// Insert branches between prologs, kernel and epilogs.

+ void fixupBranches();

+ /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block

+ /// to a block dominated by all prologs and epilogs. This allows us to treat

+ /// the loop exiting block as any other kernel clone.

+ MachineBasicBlock *CreateLCSSAExitingBlock();

+ /// Helper to get the stage of an instruction in the schedule.

+ unsigned getStage(MachineInstr *MI) {

+ if (CanonicalMIs.count(MI))

+ MI = CanonicalMIs[MI];

+ return Schedule.getStage(MI);

+ }

+};

+/// Expander that simply annotates each scheduled instruction with a post-instr

+/// symbol that can be consumed by the ModuloScheduleTest pass.

+///

+/// The post-instr symbol is a way of annotating an instruction that can be

+/// roundtripped in MIR. The syntax is:

+/// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>

+class ModuloScheduleTestAnnotater {

+ MachineFunction &MF;

+ ModuloSchedule &S;

+public:

+ ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)

+ : MF(MF), S(S) {}

+ /// Performs the annotation.

+ void annotate();

+};

+} // end namespace llvm

+#endif // LLVM_LIB_CODEGEN_MODULOSCHEDULE_H