diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
commit | 01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch) | |
tree | 4def12e759965de927d963ac65840d663ef9d1ea /lib/CodeGen/StackColoring.cpp | |
parent | f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff) |
Notes
Diffstat (limited to 'lib/CodeGen/StackColoring.cpp')
-rw-r--r-- | lib/CodeGen/StackColoring.cpp | 619 |
1 files changed, 476 insertions, 143 deletions
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 7b5203815172..87cd470d5690 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -21,33 +21,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -67,18 +64,180 @@ DisableColoring("no-stack-coloring", /// The user may write code that uses allocas outside of the declared lifetime /// zone. This can happen when the user returns a reference to a local /// data-structure. We can detect these cases and decide not to optimize the -/// code. If this flag is enabled, we try to save the user. +/// code. If this flag is enabled, we try to save the user. This option +/// is treated as overriding LifetimeStartOnFirstUse below. static cl::opt<bool> ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken")); +/// Enable enhanced dataflow scheme for lifetime analysis (treat first +/// use of stack slot as start of slot lifetime, as opposed to looking +/// for LIFETIME_START marker). See "Implementation notes" below for +/// more info. +static cl::opt<bool> +LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", + cl::init(true), cl::Hidden, + cl::desc("Treat stack lifetimes as starting on first use, not on START marker.")); + + STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +// +// Implementation Notes: +// --------------------- +// +// Consider the following motivating example: +// +// int foo() { +// char b1[1024], b2[1024]; +// if (...) { +// char b3[1024]; +// <uses of b1, b3>; +// return x; +// } else { +// char b4[1024], b5[1024]; +// <uses of b2, b4, b5>; +// return y; +// } +// } +// +// In the code above, "b3" and "b4" are declared in distinct lexical +// scopes, meaning that it is easy to prove that they can share the +// same stack slot. Variables "b1" and "b2" are declared in the same +// scope, meaning that from a lexical point of view, their lifetimes +// overlap. From a control flow pointer of view, however, the two +// variables are accessed in disjoint regions of the CFG, thus it +// should be possible for them to share the same stack slot. An ideal +// stack allocation for the function above would look like: +// +// slot 0: b1, b2 +// slot 1: b3, b4 +// slot 2: b5 +// +// Achieving this allocation is tricky, however, due to the way +// lifetime markers are inserted. Here is a simplified view of the +// control flow graph for the code above: +// +// +------ block 0 -------+ +// 0| LIFETIME_START b1, b2 | +// 1| <test 'if' condition> | +// +-----------------------+ +// ./ \. +// +------ block 1 -------+ +------ block 2 -------+ +// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 | +// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> | +// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 | +// +-----------------------+ +-----------------------+ +// \. /. +// +------ block 3 -------+ +// 8| <cleanupcode> | +// 9| LIFETIME_END b1, b2 | +// 10| return | +// +-----------------------+ +// +// If we create live intervals for the variables above strictly based +// on the lifetime markers, we'll get the set of intervals on the +// left. If we ignore the lifetime start markers and instead treat a +// variable's lifetime as beginning with the first reference to the +// var, then we get the intervals on the right. +// +// LIFETIME_START First Use +// b1: [0,9] [3,4] [8,9] +// b2: [0,9] [6,9] +// b3: [2,4] [3,4] +// b4: [5,7] [6,7] +// b5: [5,7] [6,7] +// +// For the intervals on the left, the best we can do is overlap two +// variables (b3 and b4, for example); this gives us a stack size of +// 4*1024 bytes, not ideal. When treating first-use as the start of a +// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 +// byte stack (better). +// +// Relying entirely on first-use of stack slots is problematic, +// however, due to the fact that optimizations can sometimes migrate +// uses of a variable outside of its lifetime start/end region. Here +// is an example: +// +// int bar() { +// char b1[1024], b2[1024]; +// if (...) { +// <uses of b2> +// return y; +// } else { +// <uses of b1> +// while (...) { +// char b3[1024]; +// <uses of b3> +// } +// } +// } +// +// Before optimization, the control flow graph for the code above +// might look like the following: +// +// +------ block 0 -------+ +// 0| LIFETIME_START b1, b2 | +// 1| <test 'if' condition> | +// +-----------------------+ +// ./ \. +// +------ block 1 -------+ +------- block 2 -------+ +// 2| <uses of b2> | 3| <uses of b1> | +// +-----------------------+ +-----------------------+ +// | | +// | +------- block 3 -------+ <-\. +// | 4| <while condition> | | +// | +-----------------------+ | +// | / | | +// | / +------- block 4 -------+ +// \ / 5| LIFETIME_START b3 | | +// \ / 6| <uses of b3> | | +// \ / 7| LIFETIME_END b3 | | +// \ | +------------------------+ | +// \ | \ / +// +------ block 5 -----+ \--------------- +// 8| <cleanupcode> | +// 9| LIFETIME_END b1, b2 | +// 10| return | +// +---------------------+ +// +// During optimization, however, it can happen that an instruction +// computing an address in "b3" (for example, a loop-invariant GEP) is +// hoisted up out of the loop from block 4 to block 2. [Note that +// this is not an actual load from the stack, only an instruction that +// computes the address to be loaded]. If this happens, there is now a +// path leading from the first use of b3 to the return instruction +// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is +// now larger than if we were computing live intervals strictly based +// on lifetime markers. In the example above, this lengthened lifetime +// would mean that it would appear illegal to overlap b3 with b2. +// +// To deal with this such cases, the code in ::collectMarkers() below +// tries to identify "degenerate" slots -- those slots where on a single +// forward pass through the CFG we encounter a first reference to slot +// K before we hit the slot K lifetime start marker. For such slots, +// we fall back on using the lifetime start marker as the beginning of +// the variable's lifetime. NB: with this implementation, slots can +// appear degenerate in cases where there is unstructured control flow: +// +// if (q) goto mid; +// if (x > 9) { +// int b[100]; +// memcpy(&b[0], ...); +// mid: b[k] = ...; +// abc(&b); +// } +// +// If in RPO ordering chosen to walk the CFG we happen to visit the b[k] +// before visiting the memcpy block (which will contain the lifetime start +// for "b" then it will appear that 'b' has a degenerate lifetime. +// + //===----------------------------------------------------------------------===// // StackColoring Pass //===----------------------------------------------------------------------===// @@ -126,6 +285,17 @@ class StackColoring : public MachineFunctionPass { /// once the coloring is done. SmallVector<MachineInstr*, 8> Markers; + /// Record the FI slots for which we have seen some sort of + /// lifetime marker (either start or end). + BitVector InterestingSlots; + + /// FI slots that need to be handled conservatively (for these + /// slots lifetime-start-on-first-use is disabled). + BitVector ConservativeSlots; + + /// Number of iterations taken during data flow analysis. + unsigned NumIterations; + public: static char ID; StackColoring() : MachineFunctionPass(ID) { @@ -137,6 +307,9 @@ public: private: /// Debug. void dump() const; + void dumpIntervals() const; + void dumpBB(MachineBasicBlock *MBB) const; + void dumpBV(const char *tag, const BitVector &BV) const; /// Removes all of the lifetime marker instructions from the function. /// \returns true if any markers were removed. @@ -153,6 +326,25 @@ private: /// in and out blocks. void calculateLocalLiveness(); + /// Returns TRUE if we're using the first-use-begins-lifetime method for + /// this slot (if FALSE, then the start marker is treated as start of lifetime). + bool applyFirstUse(int Slot) { + if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas) + return false; + if (ConservativeSlots.test(Slot)) + return false; + return true; + } + + /// Examines the specified instruction and returns TRUE if the instruction + /// represents the start or end of an interesting lifetime. The slot or slots + /// starting or ending are added to the vector "slots" and "isStart" is set + /// accordingly. + /// \returns True if inst contains a lifetime start or end + bool isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector<int, 4> &slots, + bool &isStart); + /// Construct the LiveIntervals for the slots. void calculateLiveIntervals(unsigned NumSlots); @@ -170,7 +362,10 @@ private: /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. - void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); + + /// Used in collectMarkers + typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap; }; } // end anonymous namespace @@ -179,55 +374,202 @@ char &llvm::StackColoringID = StackColoring::ID; INITIALIZE_PASS_BEGIN(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); AU.addRequired<SlotIndexes>(); AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } -void StackColoring::dump() const { - for (MachineBasicBlock *MBB : depth_first(MF)) { - DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " [" - << MBB->getName() << "]\n"); +#ifndef NDEBUG - LivenessMap::const_iterator BI = BlockLiveness.find(MBB); - assert(BI != BlockLiveness.end() && "Block not found"); - const BlockLifetimeInfo &BlockInfo = BI->second; +LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag, + const BitVector &BV) const { + DEBUG(dbgs() << tag << " : { "); + for (unsigned I = 0, E = BV.size(); I != E; ++I) + DEBUG(dbgs() << BV.test(I) << " "); + DEBUG(dbgs() << "}\n"); +} + +LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const { + LivenessMap::const_iterator BI = BlockLiveness.find(MBB); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; - DEBUG(dbgs()<<"BEGIN : {"); - for (unsigned i=0; i < BlockInfo.Begin.size(); ++i) - DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" "); - DEBUG(dbgs()<<"}\n"); + dumpBV("BEGIN", BlockInfo.Begin); + dumpBV("END", BlockInfo.End); + dumpBV("LIVE_IN", BlockInfo.LiveIn); + dumpBV("LIVE_OUT", BlockInfo.LiveOut); +} - DEBUG(dbgs()<<"END : {"); - for (unsigned i=0; i < BlockInfo.End.size(); ++i) - DEBUG(dbgs()<<BlockInfo.End.test(i)<<" "); +LLVM_DUMP_METHOD void StackColoring::dump() const { + for (MachineBasicBlock *MBB : depth_first(MF)) { + DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " [" + << MBB->getName() << "]\n"); + DEBUG(dumpBB(MBB)); + } +} - DEBUG(dbgs()<<"}\n"); +LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const { + for (unsigned I = 0, E = Intervals.size(); I != E; ++I) { + DEBUG(dbgs() << "Interval[" << I << "]:\n"); + DEBUG(Intervals[I]->dump()); + } +} - DEBUG(dbgs()<<"LIVE_IN: {"); - for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i) - DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" "); +#endif // not NDEBUG + +static inline int getStartOrEndSlot(const MachineInstr &MI) +{ + assert((MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) && + "Expected LIFETIME_START or LIFETIME_END op"); + const MachineOperand &MO = MI.getOperand(0); + int Slot = MO.getIndex(); + if (Slot >= 0) + return Slot; + return -1; +} - DEBUG(dbgs()<<"}\n"); - DEBUG(dbgs()<<"LIVEOUT: {"); - for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i) - DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" "); - DEBUG(dbgs()<<"}\n"); +// +// At the moment the only way to end a variable lifetime is with +// a VARIABLE_LIFETIME op (which can't contain a start). If things +// change and the IR allows for a single inst that both begins +// and ends lifetime(s), this interface will need to be reworked. +// +bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector<int, 4> &slots, + bool &isStart) +{ + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + return false; + if (!InterestingSlots.test(Slot)) + return false; + slots.push_back(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_END) { + isStart = false; + return true; + } + if (! applyFirstUse(Slot)) { + isStart = true; + return true; + } + } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { + if (! MI.isDebugValue()) { + bool found = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot<0) + continue; + if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) { + slots.push_back(Slot); + found = true; + } + } + if (found) { + isStart = true; + return true; + } + } } + return false; } -unsigned StackColoring::collectMarkers(unsigned NumSlot) { +unsigned StackColoring::collectMarkers(unsigned NumSlot) +{ unsigned MarkersFound = 0; - // Scan the function to find all lifetime markers. + BlockBitVecMap SeenStartMap; + InterestingSlots.clear(); + InterestingSlots.resize(NumSlot); + ConservativeSlots.clear(); + ConservativeSlots.resize(NumSlot); + + // number of start and end lifetime ops for each slot + SmallVector<int, 8> NumStartLifetimes(NumSlot, 0); + SmallVector<int, 8> NumEndLifetimes(NumSlot, 0); + + // Step 1: collect markers and populate the "InterestingSlots" + // and "ConservativeSlots" sets. + for (MachineBasicBlock *MBB : depth_first(MF)) { + + // Compute the set of slots for which we've seen a START marker but have + // not yet seen an END marker at this point in the walk (e.g. on entry + // to this bb). + BitVector BetweenStartEnd; + BetweenStartEnd.resize(NumSlot); + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI); + if (I != SeenStartMap.end()) { + BetweenStartEnd |= I->second; + } + } + + // Walk the instructions in the block to look for start/end ops. + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + continue; + InterestingSlots.set(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_START) { + BetweenStartEnd.set(Slot); + NumStartLifetimes[Slot] += 1; + } else { + BetweenStartEnd.reset(Slot); + NumEndLifetimes[Slot] += 1; + } + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << "Found a lifetime "); + DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START + ? "start" + : "end")); + DEBUG(dbgs() << " marker for slot #" << Slot); + DEBUG(dbgs() << " with allocation: " << Allocation->getName() + << "\n"); + } + Markers.push_back(&MI); + MarkersFound += 1; + } else { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot < 0) + continue; + if (! BetweenStartEnd.test(Slot)) { + ConservativeSlots.set(Slot); + } + } + } + } + BitVector &SeenStart = SeenStartMap[MBB]; + SeenStart |= BetweenStartEnd; + } + if (!MarkersFound) { + return 0; + } + + // PR27903: slots with multiple start or end lifetime ops are not + // safe to enable for "lifetime-start-on-first-use". + for (unsigned slot = 0; slot < NumSlot; ++slot) + if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) + ConservativeSlots.set(slot); + DEBUG(dumpBV("Conservative slots", ConservativeSlots)); + + // Step 2: compute begin/end sets for each block + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. @@ -243,35 +585,33 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); + SmallVector<int, 4> slots; for (MachineInstr &MI : *MBB) { - if (MI.getOpcode() != TargetOpcode::LIFETIME_START && - MI.getOpcode() != TargetOpcode::LIFETIME_END) - continue; - - Markers.push_back(&MI); - - bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &MO = MI.getOperand(0); - unsigned Slot = MO.getIndex(); - - MarkersFound++; - - const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); - if (Allocation) { - DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< - " with allocation: "<< Allocation->getName()<<"\n"); - } - - if (IsStart) { - BlockInfo.Begin.set(Slot); - } else { - if (BlockInfo.Begin.test(Slot)) { - // Allocas that start and end within a single block are handled - // specially when computing the LiveIntervals to avoid pessimizing - // the liveness propagation. - BlockInfo.Begin.reset(Slot); - } else { + bool isStart = false; + slots.clear(); + if (isLifetimeStartOrEnd(MI, slots, isStart)) { + if (!isStart) { + assert(slots.size() == 1 && "unexpected: MI ends multiple slots"); + int Slot = slots[0]; + if (BlockInfo.Begin.test(Slot)) { + BlockInfo.Begin.reset(Slot); + } BlockInfo.End.set(Slot); + } else { + for (auto Slot : slots) { + DEBUG(dbgs() << "Found a use of slot #" << Slot); + DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index "); + DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << " with allocation: "<< Allocation->getName()); + } + DEBUG(dbgs() << "\n"); + if (BlockInfo.End.test(Slot)) { + BlockInfo.End.reset(Slot); + } + BlockInfo.Begin.set(Slot); + } } } } @@ -282,90 +622,56 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { return MarkersFound; } -void StackColoring::calculateLocalLiveness() { - // Perform a standard reverse dataflow computation to solve for - // global liveness. The BEGIN set here is equivalent to KILL in the standard - // formulation, and END is equivalent to GEN. The result of this computation - // is a map from blocks to bitvectors where the bitvectors represent which - // allocas are live in/out of that block. - SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); - unsigned NumSSMIters = 0; +void StackColoring::calculateLocalLiveness() +{ + unsigned NumIters = 0; bool changed = true; while (changed) { changed = false; - ++NumSSMIters; - - SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; + ++NumIters; for (const MachineBasicBlock *BB : BasicBlockNumbering) { - if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); BlockLifetimeInfo &BlockInfo = BI->second; + // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - BitVector LocalLiveOut; - - // Forward propagation from begins to ends. for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { LivenessMap::const_iterator I = BlockLiveness.find(*PI); assert(I != BlockLiveness.end() && "Predecessor not found"); LocalLiveIn |= I->second.LiveOut; } - LocalLiveIn |= BlockInfo.End; - LocalLiveIn.reset(BlockInfo.Begin); - - // Reverse propagation from ends to begins. - for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - LivenessMap::const_iterator I = BlockLiveness.find(*SI); - assert(I != BlockLiveness.end() && "Successor not found"); - LocalLiveOut |= I->second.LiveIn; - } - LocalLiveOut |= BlockInfo.Begin; - LocalLiveOut.reset(BlockInfo.End); - - LocalLiveIn |= LocalLiveOut; - LocalLiveOut |= LocalLiveIn; - // After adopting the live bits, we need to turn-off the bits which - // are de-activated in this block. + // Compute LiveOut by subtracting out lifetimes that end in this + // block, then adding in lifetimes that begin in this block. If + // we have both BEGIN and END markers in the same basic block + // then we know that the BEGIN marker comes after the END, + // because we already handle the case where the BEGIN comes + // before the END when collecting the markers (and building the + // BEGIN/END vectors). + BitVector LocalLiveOut = LocalLiveIn; LocalLiveOut.reset(BlockInfo.End); - LocalLiveIn.reset(BlockInfo.Begin); - - // If we have both BEGIN and END markers in the same basic block then - // we know that the BEGIN marker comes after the END, because we already - // handle the case where the BEGIN comes before the END when collecting - // the markers (and building the BEGIN/END vectore). - // Want to enable the LIVE_IN and LIVE_OUT of slots that have both - // BEGIN and END because it means that the value lives before and after - // this basic block. - BitVector LocalEndBegin = BlockInfo.End; - LocalEndBegin &= BlockInfo.Begin; - LocalLiveIn |= LocalEndBegin; - LocalLiveOut |= LocalEndBegin; + LocalLiveOut |= BlockInfo.Begin; + // Update block LiveIn set, noting whether it has changed. if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - - NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } + // Update block LiveOut set, noting whether it has changed. if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - - NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } - - BBSet = std::move(NextBBSet); }// while changed. + + NumIterations = NumIters; } void StackColoring::calculateLiveIntervals(unsigned NumSlots) { @@ -380,28 +686,22 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Finishes.clear(); Finishes.resize(NumSlots); - // Create the interval for the basic blocks with lifetime markers in them. - for (const MachineInstr *MI : Markers) { - if (MI->getParent() != &MBB) - continue; - - assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || - MI->getOpcode() == TargetOpcode::LIFETIME_END) && - "Invalid Lifetime marker"); - - bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &Mo = MI->getOperand(0); - int Slot = Mo.getIndex(); - assert(Slot >= 0 && "Invalid slot"); + // Create the interval for the basic blocks containing lifetime begin/end. + for (const MachineInstr &MI : MBB) { + SmallVector<int, 4> slots; + bool IsStart = false; + if (!isLifetimeStartOrEnd(MI, slots, IsStart)) + continue; SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); - - if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) - Starts[Slot] = ThisIndex; - } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + for (auto Slot : slots) { + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } } } @@ -417,7 +717,29 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } for (unsigned i = 0; i < NumSlots; ++i) { - assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + // + // When LifetimeStartOnFirstUse is turned on, data flow analysis + // is forward (from starts to ends), not bidirectional. A + // consequence of this is that we can wind up in situations + // where Starts[i] is invalid but Finishes[i] is valid and vice + // versa. Example: + // + // LIFETIME_START x + // if (...) { + // <use of x> + // throw ...; + // } + // LIFETIME_END x + // return 2; + // + // + // Here the slot for "x" will not be live into the block + // containing the "return 2" (since lifetimes start with first + // use, not at the dominating LIFETIME_START marker). + // + if (Starts[i].isValid() && !Finishes[i].isValid()) { + Finishes[i] = Indexes->getMBBEndIdx(&MBB); + } if (!Starts[i].isValid()) continue; @@ -495,10 +817,21 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // upcoming replacement. SP->adjustForColoring(From, To); + // The new alloca might not be valid in a llvm.dbg.declare for this + // variable, so undef out the use to make the verifier happy. + AllocaInst *FromAI = const_cast<AllocaInst *>(From); + if (FromAI->isUsedByMetadata()) + ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType())); + for (auto &Use : FromAI->uses()) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get())) + if (BCI->isUsedByMetadata()) + ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType())); + } + // Note that this will not replace uses in MMOs (which we'll update below), // or anywhere else (which is why we won't delete the original // instruction). - const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst); + FromAI->replaceAllUsesWith(Inst); } // Remap all instructions to the new stack slots. @@ -557,7 +890,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { - SlotIndex Index = Indexes->getInstructionIndex(&I); + SlotIndex Index = Indexes->getInstructionIndex(I); const LiveInterval *Interval = &*Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && "Found instruction usage outside of live range."); @@ -616,7 +949,7 @@ void StackColoring::removeInvalidSlotRanges() { // Check that the used slot is inside the calculated lifetime range. // If it is not, warn about it and invalidate the range. LiveInterval *Interval = &*Intervals[Slot]; - SlotIndex Index = Indexes->getInstructionIndex(&I); + SlotIndex Index = Indexes->getInstructionIndex(I); if (Interval->find(Index) == Interval->end()) { Interval->clear(); DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); @@ -643,9 +976,6 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, } bool StackColoring::runOnMachineFunction(MachineFunction &Func) { - if (skipOptnoneFunction(*Func.getFunction())) - return false; - DEBUG(dbgs() << "********** Stack Coloring **********\n" << "********** Function: " << ((const Value*)Func.getFunction())->getName() << '\n'); @@ -667,7 +997,6 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { return false; SmallVector<int, 8> SortedSlots; - SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); @@ -686,7 +1015,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. - if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring || + skipFunction(*Func.getFunction())) { DEBUG(dbgs()<<"Will not try to merge slots.\n"); return removeAllMarkers(); } @@ -700,9 +1030,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Calculate the liveness of each block. calculateLocalLiveness(); + DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); + DEBUG(dump()); // Propagate the liveness information. calculateLiveIntervals(NumSlots); + DEBUG(dumpIntervals()); // Search for allocas which are used outside of the declared lifetime // markers. |