summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorRoman Divacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
committerRoman Divacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
commit907da171cc911d701da02a5cab898a9c49dd7724 (patch)
tree6a111e552c75afc66228e3d8f19b6731e4013f10 /lib/CodeGen
parent72cc50852bec44580ee7efe1aa2076273008a6ae (diff)
downloadsrc-test2-907da171cc911d701da02a5cab898a9c49dd7724.tar.gz
src-test2-907da171cc911d701da02a5cab898a9c49dd7724.zip
Notes
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp134
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h15
-rw-r--r--lib/CodeGen/AntiDepBreaker.h1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp153
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1020
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h122
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp89
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h12
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfWriter.cpp35
-rw-r--r--lib/CodeGen/BranchFolding.cpp689
-rw-r--r--lib/CodeGen/BranchFolding.h77
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp64
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp9
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp15
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp32
-rw-r--r--lib/CodeGen/LiveVariables.cpp49
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp63
-rw-r--r--lib/CodeGen/MachineFunction.cpp36
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp85
-rw-r--r--lib/CodeGen/MachineLICM.cpp65
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp2
-rw-r--r--lib/CodeGen/MachineVerifier.cpp23
-rw-r--r--lib/CodeGen/PHIElimination.cpp286
-rw-r--r--lib/CodeGen/PHIElimination.h30
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp15
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp91
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp9
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp3
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp29
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp4
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp5
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp7
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp10
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp193
-rw-r--r--lib/CodeGen/SelectionDAG/CallingConvLower.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp93
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp491
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.h14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp10
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp2
-rw-r--r--lib/CodeGen/SlotIndexes.cpp91
-rw-r--r--lib/CodeGen/Spiller.cpp44
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp44
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp2
-rw-r--r--lib/CodeGen/VirtRegMap.cpp8
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp57
55 files changed, 2644 insertions, 1864 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 86d051c102f8..c37c793b56d0 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -54,10 +54,13 @@ unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
return Node;
}
-void AggressiveAntiDepState::GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs)
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
{
for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
- if (GetGroup(Reg) == Group)
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
Regs.push_back(Reg);
}
}
@@ -99,12 +102,28 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
AggressiveAntiDepBreaker::
-AggressiveAntiDepBreaker(MachineFunction& MFi) :
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+ TargetSubtarget::RegClassVector& CriticalPathRCs) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF)),
State(NULL), SavedState(NULL) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(errs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ errs() << " " << TRI->getName(r));
+ DEBUG(errs() << '\n');
}
AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
@@ -264,16 +283,18 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
}
}
-/// AntiDepPathStep - Return SUnit that SU has an anti-dependence on.
-static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
- std::vector<SDep*>& Edges) {
+/// AntiDepEdges - Return in Edges the anti- and output-
+/// dependencies on Regs in SU that we want to consider for breaking.
+static void AntiDepEdges(SUnit *SU,
+ const AntiDepBreaker::AntiDepRegVector& Regs,
+ std::vector<SDep*>& Edges) {
AntiDepBreaker::AntiDepRegSet RegSet;
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
RegSet.insert(Regs[i]);
for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
- if (P->getKind() == SDep::Anti) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
unsigned Reg = P->getReg();
if (RegSet.count(Reg) != 0) {
Edges.push_back(&*P);
@@ -285,6 +306,31 @@ static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
assert(RegSet.empty() && "Expected all antidep registers to be found");
}
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SUnit *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU != 0) {
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : 0;
+}
+
void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
const char *tag) {
unsigned *KillIndices = State->GetKillIndices();
@@ -499,11 +545,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
- // Collect all registers in the same group as AntiDepReg. These all
- // need to be renamed together if we are to break the
- // anti-dependence.
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
std::vector<unsigned> Regs;
- State->GetGroupRegs(AntiDepGroupIndex, Regs);
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
assert(Regs.size() > 0 && "Empty register group!");
if (Regs.size() == 0)
return false;
@@ -544,9 +590,10 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
}
// FIXME: for now just handle single register in group case...
- // FIXME: check only regs that have references...
- if (Regs.size() > 1)
+ if (Regs.size() > 1) {
+ DEBUG(errs() << "\tMultiple rename registers in group\n");
return false;
+ }
// Check each possible rename register for SuperReg in round-robin
// order. If that register is available, and the corresponding
@@ -630,12 +677,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
- // Nothing to do if no candidates.
- if (Candidates.empty()) {
- DEBUG(errs() << "\n===== No anti-dependency candidates\n");
- return 0;
- }
-
// The code below assumes that there is at least one instruction,
// so just duck out immediately if the block is empty.
if (SUnits.empty()) return 0;
@@ -655,16 +696,37 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// ...need a map from MI to SUnit.
std::map<MachineInstr *, SUnit *> MISUnitMap;
-
- DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() <<
- " anti-dependencies\n");
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
SUnit *SU = &SUnits[i];
MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
}
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ SUnit *CriticalPathSU = 0;
+ MachineInstr *CriticalPathMI = 0;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+ // Even if there are no anti-dependencies we still need to go
+ // through the instructions to update Def, Kills, etc.
#ifndef NDEBUG
- {
+ if (Candidates.empty()) {
+ DEBUG(errs() << "\n===== No anti-dependency candidates\n");
+ } else {
+ DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() <<
+ " anti-dependencies\n");
DEBUG(errs() << "Available regs:");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (!State->IsLive(Reg))
@@ -691,14 +753,26 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Process the defs in MI...
PrescanInstruction(MI, Count, PassthruRegs);
-
+
+ // The the dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
std::vector<SDep*> Edges;
SUnit *PathSU = MISUnitMap[MI];
AntiDepBreaker::CandidateMap::iterator
citer = Candidates.find(PathSU);
if (citer != Candidates.end())
- AntiDepPathStep(PathSU, citer->second, Edges);
-
+ AntiDepEdges(PathSU, citer->second, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = NULL;
+ if (MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ } else {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
// Ignore KILL instructions (they form a group in ScanInstruction
// but don't cause any anti-dependence breaking themselves)
if (MI->getOpcode() != TargetInstrInfo::KILL) {
@@ -707,7 +781,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
SDep *Edge = Edges[i];
SUnit *NextSU = Edge->getSUnit();
- if (Edge->getKind() != SDep::Anti) continue;
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
unsigned AntiDepReg = Edge->getReg();
DEBUG(errs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
@@ -717,6 +792,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Don't break anti-dependencies on non-allocatable registers.
DEBUG(errs() << " (non-allocatable)\n");
continue;
+ } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(errs() << " (not critical-path)\n");
+ continue;
} else if (PassthruRegs.count(AntiDepReg) != 0) {
// If the anti-dep register liveness "passes-thru", then
// don't try to change it. It will be changed along with
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index c5121682bd63..e5c9a7bb3adf 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtarget.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -85,8 +86,11 @@ namespace llvm {
unsigned GetGroup(unsigned Reg);
// GetGroupRegs - Return a vector of the registers belonging to a
- // group.
- void GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs);
+ // group. If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs);
// UnionGroups - Union Reg1's and Reg2's groups to form a new
// group. Return the index of the GroupNode representing the
@@ -114,6 +118,10 @@ namespace llvm {
/// because they may not be safe to break.
const BitVector AllocatableSet;
+ /// CriticalPathSet - The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
/// State - The state used to identify and rename anti-dependence
/// registers.
AggressiveAntiDepState *State;
@@ -124,7 +132,8 @@ namespace llvm {
AggressiveAntiDepState *SavedState;
public:
- AggressiveAntiDepBreaker(MachineFunction& MFi);
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ TargetSubtarget::RegClassVector& CriticalPathRCs);
~AggressiveAntiDepBreaker();
/// GetMaxTrials - As anti-dependencies are broken, additional
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 277508767e1c..b614f687a462 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -23,6 +23,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include <map>
namespace llvm {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bb6bd957f063..08e0eae16c35 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/Module.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
@@ -512,7 +514,7 @@ void AsmPrinter::EmitXXStructorList(Constant *List) {
//===----------------------------------------------------------------------===//
/// LEB 128 number encoding.
-/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// PrintULEB128 - Print a series of hexadecimal values (separated by commas)
/// representing an unsigned leb128 value.
void AsmPrinter::PrintULEB128(unsigned Value) const {
char Buffer[20];
@@ -525,7 +527,7 @@ void AsmPrinter::PrintULEB128(unsigned Value) const {
} while (Value);
}
-/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// PrintSLEB128 - Print a series of hexadecimal values (separated by commas)
/// representing a signed leb128 value.
void AsmPrinter::PrintSLEB128(int Value) const {
int Sign = Value >> (8 * sizeof(Value) - 1);
@@ -546,7 +548,7 @@ void AsmPrinter::PrintSLEB128(int Value) const {
// Emission and print routines
//
-/// PrintHex - Print a value as a hexidecimal value.
+/// PrintHex - Print a value as a hexadecimal value.
///
void AsmPrinter::PrintHex(int Value) const {
char Buffer[20];
@@ -727,7 +729,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
/// Special characters are emitted properly.
/// \literal (Eg. '\t') \endliteral
void AsmPrinter::EmitString(const std::string &String) const {
- EmitString(String.c_str(), String.size());
+ EmitString(String.data(), String.size());
}
void AsmPrinter::EmitString(const char *String, unsigned Size) const {
@@ -1357,32 +1359,31 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
/// instruction's DebugLoc.
void AsmPrinter::processDebugLoc(const MachineInstr *MI,
bool BeforePrintingInsn) {
- if (!MAI || !DW)
+ if (!MAI || !DW || !MAI->doesSupportDebugInformation()
+ || !DW->ShouldEmitDwarfDebug())
return;
DebugLoc DL = MI->getDebugLoc();
- if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
- if (!DL.isUnknown()) {
- DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
- if (BeforePrintingInsn) {
- if (CurDLT.Scope != 0 && PrevDLT != CurDLT) {
- unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
- CurDLT.Scope);
- printLabel(L);
- O << '\n';
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
- DW->SetDbgScopeBeginLabels(MI, L);
-#endif
- } else {
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
- DW->SetDbgScopeEndLabels(MI, 0);
-#endif
- }
- }
+ if (DL.isUnknown())
+ return;
+ DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
+ if (CurDLT.Scope == 0)
+ return;
+
+ if (BeforePrintingInsn) {
+ if (CurDLT != PrevDLT) {
+ unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+ CurDLT.Scope);
+ printLabel(L);
+ DW->BeginScope(MI, L);
PrevDLT = CurDLT;
}
+ } else {
+ // After printing instruction
+ DW->EndScope(MI);
}
}
+
/// printInlineAsm - This method formats and prints the specified machine
/// instruction that is an inline asm.
void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
@@ -1399,6 +1400,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
// Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ O << '\t';
+
// If this asmstr is empty, just print the #APP/#NOAPP markers.
// These are useful to see where empty asm's wound up.
if (AsmStr[0] == 0) {
@@ -1636,13 +1639,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
assert(BB->hasName() &&
"Address of anonymous basic block not supported yet!");
- // FIXME: This isn't guaranteed to produce a unique name even if the
- // block and function have a name.
- std::string Mangled =
- Mang->getMangledName(F, Mang->makeNameProper(BB->getName()).c_str(),
- /*ForcePrivate=*/true);
+ // This code must use the function name itself, and not the function number,
+ // since it must be possible to generate the label name from within other
+ // functions.
+ std::string FuncName = Mang->getMangledName(F);
+
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
+ << FuncName.size() << '_' << FuncName << '_'
+ << Mang->makeNameProper(BB->getName());
- return OutContext.GetOrCreateSymbol(StringRef(Mangled));
+ return OutContext.GetOrCreateSymbol(Name.str());
}
MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
@@ -1817,21 +1824,80 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
/// EmitComments - Pretty-print comments for instructions
void AsmPrinter::EmitComments(const MachineInstr &MI) const {
- assert(VerboseAsm && !MI.getDebugLoc().isUnknown());
-
- DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+ if (!VerboseAsm)
+ return;
- // Print source line info.
- O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " SrcLine ";
- if (DLT.Scope) {
- DICompileUnit CU(DLT.Scope);
- if (!CU.isNull())
- O << CU.getFilename() << " ";
+ bool Newline = false;
+
+ if (!MI.getDebugLoc().isUnknown()) {
+ DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+
+ // Print source line info.
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " SrcLine ";
+ if (DLT.Scope) {
+ DICompileUnit CU(DLT.Scope);
+ if (!CU.isNull())
+ O << CU.getFilename() << " ";
+ }
+ O << DLT.Line;
+ if (DLT.Col != 0)
+ O << ":" << DLT.Col;
+ Newline = true;
+ }
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo =
+ MI.getParent()->getParent()->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Reload";
+ Newline = true;
+ }
+ }
+ else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Folded Reload";
+ Newline = true;
+ }
+ }
+ else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Spill";
+ Newline = true;
+ }
+ }
+ else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Folded Spill";
+ Newline = true;
+ }
+ }
+
+ // Check for spill-induced copies
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
+ SrcSubIdx, DstSubIdx)) {
+ if (MI.getAsmPrinterFlag(ReloadReuse)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Reload Reuse";
+ Newline = true;
+ }
}
- O << DLT.Line;
- if (DLT.Col != 0)
- O << ":" << DLT.Col;
}
/// PrintChildLoopComment - Print comments about child loops within
@@ -1862,8 +1928,7 @@ static void PrintChildLoopComment(formatted_raw_ostream &O,
}
/// EmitComments - Pretty-print comments for basic blocks
-void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const
-{
+void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const {
if (VerboseAsm) {
// Add loop depth information
const MachineLoop *loop = LI->getLoopFor(&MBB);
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 62b51ecd18ac..3e50a15e162d 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -29,7 +29,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
/// Dwarf abbreviation.
- class VISIBILITY_HIDDEN DIEAbbrevData {
+ class DIEAbbrevData {
/// Attribute - Dwarf attribute code.
///
unsigned Attribute;
@@ -52,7 +52,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
/// information object.
- class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode {
+ class DIEAbbrev : public FoldingSetNode {
/// Tag - Dwarf tag code.
///
unsigned Tag;
@@ -113,7 +113,7 @@ namespace llvm {
class CompileUnit;
class DIEValue;
- class VISIBILITY_HIDDEN DIE : public FoldingSetNode {
+ class DIE : public FoldingSetNode {
protected:
/// Abbrev - Buffer for constructing abbreviation.
///
@@ -202,7 +202,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEValue - A debug information entry value.
///
- class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode {
+ class DIEValue : public FoldingSetNode {
public:
enum {
isInteger,
@@ -249,7 +249,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEInteger - An integer value DIE.
///
- class VISIBILITY_HIDDEN DIEInteger : public DIEValue {
+ class DIEInteger : public DIEValue {
uint64_t Integer;
public:
explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
@@ -294,7 +294,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEString - A string value DIE.
///
- class VISIBILITY_HIDDEN DIEString : public DIEValue {
+ class DIEString : public DIEValue {
const std::string Str;
public:
explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
@@ -326,7 +326,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEDwarfLabel - A Dwarf internal label expression DIE.
//
- class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue {
+ class DIEDwarfLabel : public DIEValue {
const DWLabel Label;
public:
explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
@@ -356,7 +356,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEObjectLabel - A label to an object in code or data.
//
- class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue {
+ class DIEObjectLabel : public DIEValue {
const std::string Label;
public:
explicit DIEObjectLabel(const std::string &L)
@@ -389,7 +389,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIESectionOffset - A section offset DIE.
///
- class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue {
+ class DIESectionOffset : public DIEValue {
const DWLabel Label;
const DWLabel Section;
bool IsEH : 1;
@@ -428,7 +428,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEDelta - A simple label difference DIE.
///
- class VISIBILITY_HIDDEN DIEDelta : public DIEValue {
+ class DIEDelta : public DIEValue {
const DWLabel LabelHi;
const DWLabel LabelLo;
public:
@@ -462,7 +462,7 @@ namespace llvm {
/// DIEntry - A pointer to another debug information entry. An instance of
/// this class can also be used as a proxy for a debug information entry not
/// yet defined (ie. types.)
- class VISIBILITY_HIDDEN DIEEntry : public DIEValue {
+ class DIEEntry : public DIEValue {
DIE *Entry;
public:
explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
@@ -497,7 +497,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEBlock - A block of values. Primarily used for location expressions.
//
- class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE {
+ class DIEBlock : public DIEValue, public DIE {
unsigned Size; // Size in bytes excluding size header.
public:
DIEBlock()
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1372fc21685f..c62c43545c46 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -48,7 +48,7 @@ namespace llvm {
//===----------------------------------------------------------------------===//
/// CompileUnit - This dwarf writer support class manages information associate
/// with a source file.
-class VISIBILITY_HIDDEN CompileUnit {
+class CompileUnit {
/// ID - File identifier for source.
///
unsigned ID;
@@ -127,61 +127,66 @@ public:
class DbgVariable {
DIVariable Var; // Variable Descriptor.
unsigned FrameIndex; // Variable frame index.
- bool InlinedFnVar; // Variable for an inlined function.
+ DbgVariable *AbstractVar; // Abstract variable for this variable.
+ DIE *TheDIE;
public:
- DbgVariable(DIVariable V, unsigned I, bool IFV)
- : Var(V), FrameIndex(I), InlinedFnVar(IFV) {}
+ DbgVariable(DIVariable V, unsigned I)
+ : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0) {}
// Accessors.
- DIVariable getVariable() const { return Var; }
- unsigned getFrameIndex() const { return FrameIndex; }
- bool isInlinedFnVar() const { return InlinedFnVar; }
+ DIVariable getVariable() const { return Var; }
+ unsigned getFrameIndex() const { return FrameIndex; }
+ void setAbstractVariable(DbgVariable *V) { AbstractVar = V; }
+ DbgVariable *getAbstractVariable() const { return AbstractVar; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
};
//===----------------------------------------------------------------------===//
/// DbgScope - This class is used to track scope information.
///
-class DbgConcreteScope;
class DbgScope {
DbgScope *Parent; // Parent to this scope.
- DIDescriptor Desc; // Debug info descriptor for scope.
- // FIXME use WeakVH for Desc.
- WeakVH InlinedAt; // If this scope represents inlined
- // function body then this is the location
- // where this body is inlined.
+ DIDescriptor Desc; // Debug info descriptor for scope.
+ WeakVH InlinedAtLocation; // Location at which scope is inlined.
+ bool AbstractScope; // Abstract Scope
unsigned StartLabelID; // Label ID of the beginning of scope.
unsigned EndLabelID; // Label ID of the end of scope.
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope.
SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
- SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
// Private state for dump()
mutable unsigned IndentLevel;
public:
DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
- : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0),
+ : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+ StartLabelID(0), EndLabelID(0),
LastInsn(0), FirstInsn(0), IndentLevel(0) {}
virtual ~DbgScope();
// Accessors.
DbgScope *getParent() const { return Parent; }
+ void setParent(DbgScope *P) { Parent = P; }
DIDescriptor getDesc() const { return Desc; }
- MDNode *getInlinedAt() const {
- return dyn_cast_or_null<MDNode>(InlinedAt);
+ MDNode *getInlinedAt() const {
+ return dyn_cast_or_null<MDNode>(InlinedAtLocation);
}
+ MDNode *getScopeNode() const { return Desc.getNode(); }
unsigned getStartLabelID() const { return StartLabelID; }
unsigned getEndLabelID() const { return EndLabelID; }
SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
- SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
void setStartLabelID(unsigned S) { StartLabelID = S; }
void setEndLabelID(unsigned E) { EndLabelID = E; }
void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
const MachineInstr *getLastInsn() { return LastInsn; }
void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+ void setAbstractScope() { AbstractScope = true; }
+ bool isAbstractScope() const { return AbstractScope; }
const MachineInstr *getFirstInsn() { return FirstInsn; }
+
/// AddScope - Add a scope to the scope.
///
void AddScope(DbgScope *S) { Scopes.push_back(S); }
@@ -190,10 +195,6 @@ public:
///
void AddVariable(DbgVariable *V) { Variables.push_back(V); }
- /// AddConcreteInst - Add a concrete instance to the scope.
- ///
- void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
-
void FixInstructionMarkers() {
assert (getFirstInsn() && "First instruction is missing!");
if (getLastInsn())
@@ -218,11 +219,15 @@ public:
void DbgScope::dump() const {
raw_ostream &err = errs();
err.indent(IndentLevel);
- Desc.dump();
+ MDNode *N = Desc.getNode();
+ N->dump();
err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+ if (AbstractScope)
+ err << "Abstract Scope\n";
IndentLevel += 2;
-
+ if (!Scopes.empty())
+ err << "Children ...\n";
for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
if (Scopes[i] != this)
Scopes[i]->dump();
@@ -235,7 +240,7 @@ void DbgScope::dump() const {
/// DbgConcreteScope - This class is used to track a scope that holds concrete
/// instance information.
///
-class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope {
+class DbgConcreteScope : public DbgScope {
CompileUnit *Unit;
DIE *Die; // Debug info for this concrete scope.
public:
@@ -251,8 +256,6 @@ DbgScope::~DbgScope() {
delete Scopes[i];
for (unsigned j = 0, M = Variables.size(); j < M; ++j)
delete Variables[j];
- for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k)
- delete ConcreteInsts[k];
}
} // end llvm namespace
@@ -262,7 +265,7 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
ValuesSet(InitValuesSetSize), Values(), StringPool(),
SectionSourceLines(), didInitial(false), shouldEmit(false),
- FunctionDbgScope(0), DebugTimer(0) {
+ CurrentFnDbgScope(0), DebugTimer(0) {
if (TimePassesIsEnabled)
DebugTimer = new Timer("Dwarf Debug Writer",
getDwarfTimerGroup());
@@ -271,11 +274,6 @@ DwarfDebug::~DwarfDebug() {
for (unsigned j = 0, M = Values.size(); j < M; ++j)
delete Values[j];
- for (DenseMap<const MDNode *, DbgScope *>::iterator
- I = AbstractInstanceRootMap.begin(),
- E = AbstractInstanceRootMap.end(); I != E;++I)
- delete I->second;
-
delete DebugTimer;
}
@@ -1097,6 +1095,10 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
/// CreateGlobalVariableDIE - Create new DIE using GV.
DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
const DIGlobalVariable &GV) {
+ // If the global variable was optmized out then no need to create debug info entry.
+ if (!GV.getGlobal()) return NULL;
+ if (!GV.getDisplayName()) return NULL;
+
DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
GV.getDisplayName());
@@ -1233,9 +1235,6 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
}
}
- if (!SP.isLocalToUnit() && !IsInlined)
- AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
// DW_TAG_inlined_subroutine may refer to this DIE.
DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
Slot = SPDie;
@@ -1283,263 +1282,341 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
AddSourceLine(VariableDie, &VD);
// Add variable type.
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead.
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
if (VD.isBlockByrefVariable())
AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
else
AddType(Unit, VariableDie, VD.getType());
// Add variable address.
- if (!DV->isInlinedFnVar()) {
- // Variables for abstract instances of inlined functions don't get a
- // location.
- MachineLocation Location;
- Location.set(RI->getFrameRegister(*MF),
- RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-
-
- if (VD.hasComplexAddress())
- AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else if (VD.isBlockByrefVariable())
- AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else
- AddAddress(VariableDie, dwarf::DW_AT_location, Location);
- }
+ // Variables for abstract instances of inlined functions don't get a
+ // location.
+ MachineLocation Location;
+ Location.set(RI->getFrameRegister(*MF),
+ RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+
+
+ if (VD.hasComplexAddress())
+ AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else if (VD.isBlockByrefVariable())
+ AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else
+ AddAddress(VariableDie, dwarf::DW_AT_location, Location);
return VariableDie;
}
-/// getOrCreateScope - Returns the scope associated with the given descriptor.
-///
-DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
- MDNode *InlinedAt) {
- ValueMap<MDNode *, DbgScope *>::iterator VI = DbgScopeMap.find(N);
- if (VI != DbgScopeMap.end())
- return VI->second;
+/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
+/// Initialize scope and update scope hierarchy.
+DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
+ MDNode *InlinedAt) {
+ assert (N && "Invalid Scope encoding!");
+ assert (MI && "Missing machine instruction!");
+ bool GetConcreteScope = (MI && InlinedAt);
- DbgScope *Parent = NULL;
+ DbgScope *NScope = NULL;
+
+ if (InlinedAt)
+ NScope = DbgScopeMap.lookup(InlinedAt);
+ else
+ NScope = DbgScopeMap.lookup(N);
+ assert (NScope && "Unable to find working scope!");
+
+ if (NScope->getFirstInsn())
+ return NScope;
- if (InlinedAt) {
+ DbgScope *Parent = NULL;
+ if (GetConcreteScope) {
DILocation IL(InlinedAt);
- assert (!IL.isNull() && "Invalid InlindAt location!");
- ValueMap<MDNode *, DbgScope *>::iterator DSI =
- DbgScopeMap.find(IL.getScope().getNode());
- assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!");
- Parent = DSI->second;
- } else {
- DIDescriptor Scope(N);
- if (Scope.isCompileUnit()) {
- return NULL;
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(N);
- DIDescriptor ParentDesc = SP.getContext();
- if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
- Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(N);
- DIDescriptor ParentDesc = DB.getContext();
- if (!ParentDesc.isNull())
- Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
- } else
- assert (0 && "Unexpected scope info");
- }
-
- DbgScope *NScope = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
+ Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
+ IL.getOrigLocation().getNode());
+ assert (Parent && "Unable to find Parent scope!");
+ NScope->setParent(Parent);
+ Parent->AddScope(NScope);
+ } else if (DIDescriptor(N).isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ if (!DB.getContext().isNull()) {
+ Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
+ NScope->setParent(Parent);
+ Parent->AddScope(NScope);
+ }
+ }
+
NScope->setFirstInsn(MI);
- if (Parent)
- Parent->AddScope(NScope);
- else
- // First function is top level function.
- if (!FunctionDbgScope)
- FunctionDbgScope = NScope;
+ if (!Parent && !InlinedAt) {
+ StringRef SPName = DISubprogram(N).getLinkageName();
+ if (SPName == MF->getFunction()->getName())
+ CurrentFnDbgScope = NScope;
+ }
+
+ if (GetConcreteScope) {
+ ConcreteScopes[InlinedAt] = NScope;
+ getOrCreateAbstractScope(N);
+ }
- DbgScopeMap.insert(std::make_pair(N, NScope));
return NScope;
}
+DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
+ assert (N && "Invalid Scope encoding!");
-/// getOrCreateScope - Returns the scope associated with the given descriptor.
-/// FIXME - Remove this method.
-DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) {
- DbgScope *&Slot = DbgScopeMap[N];
- if (Slot) return Slot;
-
+ DbgScope *AScope = AbstractScopes.lookup(N);
+ if (AScope)
+ return AScope;
+
DbgScope *Parent = NULL;
- DILexicalBlock Block(N);
- // Don't create a new scope if we already created one for an inlined function.
- DenseMap<const MDNode *, DbgScope *>::iterator
- II = AbstractInstanceRootMap.find(N);
- if (II != AbstractInstanceRootMap.end())
- return LexicalScopeStack.back();
-
- if (!Block.isNull()) {
- DIDescriptor ParentDesc = Block.getContext();
- Parent =
- ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getNode());
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ if (!ParentDesc.isNull())
+ Parent = getOrCreateAbstractScope(ParentDesc.getNode());
}
- Slot = new DbgScope(Parent, DIDescriptor(N));
+ AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
if (Parent)
- Parent->AddScope(Slot);
- else
- // First function is top level function.
- FunctionDbgScope = Slot;
+ Parent->AddScope(AScope);
+ AScope->setAbstractScope();
+ AbstractScopes[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+static DISubprogram getDISubprogram(MDNode *N) {
- return Slot;
+ DIDescriptor D(N);
+ if (D.isNull())
+ return DISubprogram();
+
+ if (D.isCompileUnit())
+ return DISubprogram();
+
+ if (D.isSubprogram())
+ return DISubprogram(N);
+
+ if (D.isLexicalBlock())
+ return getDISubprogram(DILexicalBlock(N).getContext().getNode());
+
+ llvm_unreachable("Unexpected Descriptor!");
}
-/// ConstructDbgScope - Construct the components of a scope.
-///
-void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope,
- unsigned ParentStartID,
- unsigned ParentEndID,
- DIE *ParentDie, CompileUnit *Unit) {
- // Add variables to scope.
- SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables();
- for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
- DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit);
- if (VariableDie) ParentDie->AddChild(VariableDie);
- }
+DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) {
+
+ DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode);
+ assert (SPDie && "Unable to find subprogram DIE!");
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ if (!DISubprogram(SPNode).isLocalToUnit())
+ AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ // If there are global variables at this scope then add their dies.
+ for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
+ SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
+ MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
+ if (!N) continue;
+ DIGlobalVariable GV(N);
+ if (GV.getContext().getNode() == SPNode) {
+ DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+ if (ScopedGVDie)
+ SPDie->AddChild(ScopedGVDie);
+ }
+ }
+ return SPDie;
+}
+
+DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0)
+ return NULL;
- // Add concrete instances to scope.
- SmallVector<DbgConcreteScope *, 8> &ConcreteInsts =
- ParentScope->getConcreteInsts();
- for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) {
- DbgConcreteScope *ConcreteInst = ConcreteInsts[i];
- DIE *Die = ConcreteInst->getDie();
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
- unsigned StartID = ConcreteInst->getStartLabelID();
- unsigned EndID = ConcreteInst->getEndLabelID();
+ AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ StartID ?
+ DWLabel("label", StartID)
+ : DWLabel("func_begin", SubprogramCount));
+ AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ EndID ?
+ DWLabel("label", EndID)
+ : DWLabel("func_end", SubprogramCount));
- // Add the scope bounds.
- if (StartID)
- AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("label", StartID));
- else
- AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
- if (EndID)
- AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("label", EndID));
- else
- AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
- ParentDie->AddChild(Die);
- }
+ return ScopeDIE;
+}
- // Add nested scopes.
- SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes();
- for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
- // Define the Scope debug information entry.
- DbgScope *Scope = Scopes[j];
+DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+ assert (StartID && "Invalid starting label for an inlined scope!");
+ assert (EndID && "Invalid end label for an inlined scope!");
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0)
+ return NULL;
- unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
- unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+ return NULL;
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
- // Ignore empty scopes.
- if (StartID == EndID && StartID != 0) continue;
+ DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+ DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+ assert (OriginDIE && "Unable to find Origin DIE!");
+ AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
- // Do not ignore inlined scopes even if they don't have any variables or
- // scopes.
- if (Scope->getScopes().empty() && Scope->getVariables().empty() &&
- Scope->getConcreteInsts().empty())
- continue;
+ AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
- if (StartID == ParentStartID && EndID == ParentEndID) {
- // Just add stuff to the parent scope.
- ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
- } else {
- DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block);
+ InlinedSubprogramDIEs.insert(OriginDIE);
- // Add the scope bounds.
- if (StartID)
- AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("label", StartID));
- else
- AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
+ // Track the start label for this inlined function.
+ ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP.getNode());
- if (EndID)
- AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("label", EndID));
- else
- AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP.getNode());
+ } else
+ I->second.push_back(std::make_pair(StartID, ScopeDIE));
- // Add the scope's contents.
- ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit);
- ParentDie->AddChild(ScopeDie);
- }
- }
+ StringPool.insert(InlinedSP.getName());
+ StringPool.insert(InlinedSP.getLinkageName());
+ DILocation DL(Scope->getInlinedAt());
+ AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+ AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ return ScopeDIE;
}
-/// ConstructFunctionDbgScope - Construct the scope for the subprogram.
-///
-void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
- bool AbstractScope) {
- // Exit if there is no root scope.
- if (!RootScope) return;
- DIDescriptor Desc = RootScope->getDesc();
- if (Desc.isNull())
- return;
+DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV,
+ DbgScope *Scope, CompileUnit *Unit) {
+ // Get the descriptor.
+ const DIVariable &VD = DV->getVariable();
+ const char *Name = VD.getName();
+ if (!Name)
+ return NULL;
- // Get the subprogram debug information entry.
- DISubprogram SPD(Desc.getNode());
-
- // Get the subprogram die.
- DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
- if (!SPDie) {
- ConstructSubprogram(SPD.getNode());
- SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
- }
- assert(SPDie && "Missing subprogram descriptor");
-
- if (!AbstractScope) {
- // Add the function bounds.
- AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
- AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
- MachineLocation Location(RI->getFrameRegister(*MF));
- AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
- }
-
- ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU);
- // If there are global variables at this scope then add their dies.
- for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
- SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
- MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
- if (!N) continue;
- DIGlobalVariable GV(N);
- if (GV.getContext().getNode() == RootScope->getDesc().getNode()) {
- DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
- SPDie->AddChild(ScopedGVDie);
- }
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (VD.getTag()) {
+ case dwarf::DW_TAG_return_variable:
+ return NULL;
+ case dwarf::DW_TAG_arg_variable:
+ Tag = dwarf::DW_TAG_formal_parameter;
+ break;
+ case dwarf::DW_TAG_auto_variable: // fall thru
+ default:
+ Tag = dwarf::DW_TAG_variable;
+ break;
}
-}
-/// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
-///
-void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
- StringMap<DIE*> &Globals = ModuleCU->getGlobals();
- StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName());
- if (GI != Globals.end()) {
- DIE *SPDie = GI->second;
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
- // Add the function bounds.
- AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
- AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
- MachineLocation Location(RI->getFrameRegister(*MF));
- AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ DIE *AbsDIE = NULL;
+ if (DbgVariable *AV = DV->getAbstractVariable())
+ AbsDIE = AV->getDIE();
+
+ if (AbsDIE) {
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+ DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+ (void) OriginSPDIE;
+ assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
+ DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
+ assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
+ AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
}
+ else {
+ AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ AddSourceLine(VariableDie, &VD);
+
+ // Add variable type.
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (VD.isBlockByrefVariable())
+ AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+ else
+ AddType(Unit, VariableDie, VD.getType());
+ }
+
+ // Add variable address.
+ if (!Scope->isAbstractScope()) {
+ MachineLocation Location;
+ Location.set(RI->getFrameRegister(*MF),
+ RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+
+
+ if (VD.hasComplexAddress())
+ AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else if (VD.isBlockByrefVariable())
+ AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else
+ AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+
+}
+DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
+ if (!Scope)
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+ return NULL;
+
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = ConstructInlinedScopeDIE(Scope);
+ else if (DS.isSubprogram()) {
+ if (Scope->isAbstractScope())
+ ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode());
+ else
+ ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode());
+ }
+ else {
+ ScopeDIE = ConstructLexicalScopeDIE(Scope);
+ if (!ScopeDIE) return NULL;
+ }
+
+ // Add variables to scope.
+ SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU);
+ if (VariableDIE)
+ ScopeDIE->AddChild(VariableDIE);
+ }
+
+ // Add nested scopes.
+ SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+ // Define the Scope debug information entry.
+ DIE *NestedDIE = ConstructScopeDIE(Scopes[j]);
+ if (NestedDIE)
+ ScopeDIE->AddChild(NestedDIE);
+ }
+ return ScopeDIE;
}
/// GetOrCreateSourceID - Look up the source id with the given directory and
@@ -1680,6 +1757,9 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
+ if (!MAI->doesSupportDebugInformation())
+ return;
+
DebugInfoFinder DbgFinder;
DbgFinder.processModule(*M);
@@ -1710,7 +1790,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
ConstructGlobalVariableDIE(*I);
}
- // Create DIEs for each of the externally visible subprograms.
+ // Create DIEs for each subprogram.
for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
E = DbgFinder.subprogram_end(); I != E; ++I)
ConstructSubprogram(*I);
@@ -1754,6 +1834,13 @@ void DwarfDebug::EndModule() {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+
// Standard sections final addresses.
Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
EmitLabel("text_end", 0);
@@ -1811,55 +1898,102 @@ void DwarfDebug::EndModule() {
DebugTimer->stopTimer();
}
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+ DILocation &ScopeLoc) {
+
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var, FrameIdx);
+ Scope->AddVariable(AbsDbgVariable);
+ AbstractVariables[Var.getNode()] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
/// CollectVariableInfo - Populate DbgScope entries with variables' info.
void DwarfDebug::CollectVariableInfo() {
if (!MMI) return;
+
MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
VE = VMap.end(); VI != VE; ++VI) {
MetadataBase *MB = VI->first;
MDNode *Var = dyn_cast_or_null<MDNode>(MB);
+ if (!Var) continue;
DIVariable DV (Var);
- if (DV.isNull()) continue;
- unsigned VSlot = VI->second;
- DbgScope *Scope = NULL;
- ValueMap<MDNode *, DbgScope *>::iterator DSI =
- DbgScopeMap.find(DV.getContext().getNode());
- if (DSI != DbgScopeMap.end())
- Scope = DSI->second;
- else
- // There is not any instruction assocated with this scope, so get
- // a new scope.
- Scope = getDbgScope(DV.getContext().getNode(),
- NULL /* Not an instruction */,
- NULL /* Not inlined */);
- assert (Scope && "Unable to find variable scope!");
- Scope->AddVariable(new DbgVariable(DV, VSlot, false));
- }
-}
-
-/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
-/// start with this machine instruction.
-void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) {
+ std::pair< unsigned, MDNode *> VP = VI->second;
+ DILocation ScopeLoc(VP.second);
+
+ DbgScope *Scope =
+ ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
+ if (!Scope)
+ Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ DbgVariable *RegVar = new DbgVariable(DV, VP.first);
+ Scope->AddVariable(RegVar);
+ if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc))
+ RegVar->setAbstractVariable(AbsDbgVariable);
+ }
+}
+
+/// BeginScope - Process beginning of a scope starting at Label.
+void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) {
InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
if (I == DbgScopeBeginMap.end())
return;
- SmallVector<DbgScope *, 2> &SD = I->second;
- for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+ ScopeVector &SD = DbgScopeBeginMap[MI];
+ for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
SDI != SDE; ++SDI)
(*SDI)->setStartLabelID(Label);
}
-/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
-/// end with this machine instruction.
-void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) {
+/// EndScope - Process end of a scope.
+void DwarfDebug::EndScope(const MachineInstr *MI) {
InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
if (I == DbgScopeEndMap.end())
return;
+
+ unsigned Label = MMI->NextLabelID();
+ Asm->printLabel(Label);
+
SmallVector<DbgScope *, 2> &SD = I->second;
for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
SDI != SDE; ++SDI)
(*SDI)->setEndLabelID(Label);
+ return;
+}
+
+/// createDbgScope - Create DbgScope for the scope.
+void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+
+ if (!InlinedAt) {
+ DbgScope *WScope = DbgScopeMap.lookup(Scope);
+ if (WScope)
+ return;
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+ DbgScopeMap.insert(std::make_pair(Scope, WScope));
+ if (DIDescriptor(Scope).isLexicalBlock())
+ createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+ return;
+ }
+
+ DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+ if (WScope)
+ return;
+
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+ DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+ DILocation DL(InlinedAt);
+ createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
}
/// ExtractScopeInformation - Scan machine instructions in this function
@@ -1870,26 +2004,41 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
if (!DbgScopeMap.empty())
return false;
- // Scan each instruction and create scopes.
+ // Scan each instruction and create scopes. First build working set of scopes.
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I) {
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
DebugLoc DL = MInsn->getDebugLoc();
- if (DL.isUnknown())
- continue;
+ if (DL.isUnknown()) continue;
DebugLocTuple DLT = MF->getDebugLocTuple(DL);
- if (!DLT.Scope)
- continue;
+ if (!DLT.Scope) continue;
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
// into a scope DIE at the end.
- DIDescriptor D(DLT.Scope);
- if (!D.isCompileUnit()) {
- DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
- Scope->setLastInsn(MInsn);
- }
+ if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
+ createDbgScope(DLT.Scope, DLT.InlinedAtLoc);
+ }
+ }
+
+
+ // Build scope hierarchy using working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+ DebugLoc DL = MInsn->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ DebugLocTuple DLT = MF->getDebugLocTuple(DL);
+ if (!DLT.Scope) continue;
+ // There is no need to create another DIE for compile unit. For all
+ // other scopes, create one DbgScope now. This will be translated
+ // into a scope DIE at the end.
+ if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
+ DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
+ Scope->setLastInsn(MInsn);
}
}
@@ -1897,8 +2046,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
// last instruction as this scope's last instrunction.
for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
DE = DbgScopeMap.end(); DI != DE; ++DI) {
- DbgScope *S = DI->second;
- if (!S) continue;
+ if (DI->second->isAbstractScope())
+ continue;
assert (DI->second->getFirstInsn() && "Invalid first instruction!");
DI->second->FixInstructionMarkers();
assert (DI->second->getLastInsn() && "Invalid last instruction!");
@@ -1911,7 +2060,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
DE = DbgScopeMap.end(); DI != DE; ++DI) {
DbgScope *S = DI->second;
- if (!S) continue;
+ if (S->isAbstractScope())
+ continue;
const MachineInstr *MI = S->getFirstInsn();
assert (MI && "DbgScope does not have first instruction!");
@@ -1919,8 +2069,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
if (IDI != DbgScopeBeginMap.end())
IDI->second.push_back(S);
else
- DbgScopeBeginMap.insert(std::make_pair(MI,
- SmallVector<DbgScope *, 2>(2, S)));
+ DbgScopeBeginMap[MI].push_back(S);
MI = S->getLastInsn();
assert (MI && "DbgScope does not have last instruction!");
@@ -1928,31 +2077,12 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
if (IDI != DbgScopeEndMap.end())
IDI->second.push_back(S);
else
- DbgScopeEndMap.insert(std::make_pair(MI,
- SmallVector<DbgScope *, 2>(2, S)));
+ DbgScopeEndMap[MI].push_back(S);
}
return !DbgScopeMap.empty();
}
-static DISubprogram getDISubprogram(MDNode *N) {
-
- DIDescriptor D(N);
- if (D.isNull())
- return DISubprogram();
-
- if (D.isCompileUnit())
- return DISubprogram();
-
- if (D.isSubprogram())
- return DISubprogram(N);
-
- if (D.isLexicalBlock())
- return getDISubprogram(DILexicalBlock(N).getContext().getNode());
-
- llvm_unreachable("Unexpected Descriptor!");
-}
-
/// BeginFunction - Gather pre-function debug information. Assumes being
/// emitted immediately after the function entry point.
void DwarfDebug::BeginFunction(MachineFunction *MF) {
@@ -1963,11 +2093,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
if (!ExtractScopeInformation(MF))
return;
CollectVariableInfo();
-#endif
// Begin accumulating function debug information.
MMI->BeginFunction(MF);
@@ -1977,7 +2105,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
// Emit label for the implicitly defined dbg.stoppoint at the start of the
// function.
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
DebugLoc FDL = MF->getDefaultDebugLoc();
if (!FDL.isUnknown()) {
DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
@@ -1990,15 +2117,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
Asm->printLabel(LabelID);
O << '\n';
}
-#else
- DebugLoc FDL = MF->getDefaultDebugLoc();
- if (!FDL.isUnknown()) {
- DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
- unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
- Asm->printLabel(LabelID);
- O << '\n';
- }
-#endif
if (TimePassesIsEnabled)
DebugTimer->stopTimer();
}
@@ -2011,10 +2129,9 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
if (DbgScopeMap.empty())
return;
-#endif
+
// Define end label for subprogram.
EmitLabel("func_end", SubprogramCount);
@@ -2029,41 +2146,24 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
Lines.begin(), Lines.end());
}
- // Construct the DbgScope for abstract instances.
- for (SmallVector<DbgScope *, 32>::iterator
- I = AbstractInstanceRootList.begin(),
- E = AbstractInstanceRootList.end(); I != E; ++I)
- ConstructFunctionDbgScope(*I);
+ // Construct abstract scopes.
+ for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+ AE = AbstractScopesList.end(); AI != AE; ++AI)
+ ConstructScopeDIE(*AI);
- // Construct scopes for subprogram.
- if (FunctionDbgScope)
- ConstructFunctionDbgScope(FunctionDbgScope);
- else
- // FIXME: This is wrong. We are essentially getting past a problem with
- // debug information not being able to handle unreachable blocks that have
- // debug information in them. In particular, those unreachable blocks that
- // have "region end" info in them. That situation results in the "root
- // scope" not being created. If that's the case, then emit a "default"
- // scope, i.e., one that encompasses the whole function. This isn't
- // desirable. And a better way of handling this (and all of the debugging
- // information) needs to be explored.
- ConstructDefaultDbgScope(MF);
+ ConstructScopeDIE(CurrentFnDbgScope);
DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
MMI->getFrameMoves()));
// Clear debug info
- if (FunctionDbgScope) {
- delete FunctionDbgScope;
+ if (CurrentFnDbgScope) {
+ CurrentFnDbgScope = NULL;
DbgScopeMap.clear();
DbgScopeBeginMap.clear();
DbgScopeEndMap.clear();
- DbgAbstractScopeMap.clear();
- DbgConcreteScopeMap.clear();
- FunctionDbgScope = NULL;
- LexicalScopeStack.clear();
- AbstractInstanceRootList.clear();
- AbstractInstanceRootMap.clear();
+ ConcreteScopes.clear();
+ AbstractScopesList.clear();
}
Lines.clear();
@@ -2130,201 +2230,6 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
return SrcId;
}
-/// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfDebug::RecordRegionStart(MDNode *N) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- DbgScope *Scope = getOrCreateScope(N);
- unsigned ID = MMI->NextLabelID();
- if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
- LexicalScopeStack.push_back(Scope);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return ID;
-}
-
-/// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfDebug::RecordRegionEnd(MDNode *N) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- DbgScope *Scope = getOrCreateScope(N);
- unsigned ID = MMI->NextLabelID();
- Scope->setEndLabelID(ID);
- // FIXME : region.end() may not be in the last basic block.
- // For now, do not pop last lexical scope because next basic
- // block may start new inlined function's body.
- unsigned LSSize = LexicalScopeStack.size();
- if (LSSize != 0 && LSSize != 1)
- LexicalScopeStack.pop_back();
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return ID;
-}
-
-/// RecordVariable - Indicate the declaration of a local variable.
-void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- DIDescriptor Desc(N);
- DbgScope *Scope = NULL;
- bool InlinedFnVar = false;
-
- if (Desc.getTag() == dwarf::DW_TAG_variable)
- Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode());
- else {
- bool InlinedVar = false;
- MDNode *Context = DIVariable(N).getContext().getNode();
- DISubprogram SP(Context);
- if (!SP.isNull()) {
- // SP is inserted into DbgAbstractScopeMap when inlined function
- // start was recorded by RecordInlineFnStart.
- ValueMap<MDNode *, DbgScope *>::iterator
- I = DbgAbstractScopeMap.find(SP.getNode());
- if (I != DbgAbstractScopeMap.end()) {
- InlinedVar = true;
- Scope = I->second;
- }
- }
- if (!InlinedVar)
- Scope = getOrCreateScope(Context);
- }
-
- assert(Scope && "Unable to find the variable's scope");
- DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar);
- Scope->AddVariable(DV);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-}
-
-//// RecordInlinedFnStart - Indicate the start of inlined subroutine.
-unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
- unsigned Line, unsigned Col) {
- unsigned LabelID = MMI->NextLabelID();
-
- if (!MAI->doesDwarfUsesInlineInfoSection())
- return LabelID;
-
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- MDNode *Node = SP.getNode();
- DenseMap<const MDNode *, DbgScope *>::iterator
- II = AbstractInstanceRootMap.find(Node);
-
- if (II == AbstractInstanceRootMap.end()) {
- // Create an abstract instance entry for this inlined function if it doesn't
- // already exist.
- DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node));
-
- // Get the compile unit context.
- DIE *SPDie = ModuleCU->getDieMapSlotFor(Node);
- if (!SPDie)
- SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true);
-
- // Mark as being inlined. This makes this subprogram entry an abstract
- // instance root.
- // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only
- // that it's defined. That probably won't change in the future. However,
- // this could be more elegant.
- AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
-
- // Keep track of the abstract scope for this function.
- DbgAbstractScopeMap[Node] = Scope;
-
- AbstractInstanceRootMap[Node] = Scope;
- AbstractInstanceRootList.push_back(Scope);
- }
-
- // Create a concrete inlined instance for this inlined function.
- DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node));
- DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
- ScopeDie->setAbstractCompileUnit(ModuleCU);
-
- DIE *Origin = ModuleCU->getDieMapSlotFor(Node);
- AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, Origin);
- AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
- AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line);
- AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col);
-
- ConcreteScope->setDie(ScopeDie);
- ConcreteScope->setStartLabelID(LabelID);
- MMI->RecordUsedDbgLabel(LabelID);
-
- LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
-
- // Keep track of the concrete scope that's inlined into this function.
- ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
- SI = DbgConcreteScopeMap.find(Node);
-
- if (SI == DbgConcreteScopeMap.end())
- DbgConcreteScopeMap[Node].push_back(ConcreteScope);
- else
- SI->second.push_back(ConcreteScope);
-
- // Track the start label for this inlined function.
- ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
- I = InlineInfo.find(Node);
-
- if (I == InlineInfo.end())
- InlineInfo[Node].push_back(LabelID);
- else
- I->second.push_back(LabelID);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return LabelID;
-}
-
-/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
- if (!MAI->doesDwarfUsesInlineInfoSection())
- return 0;
-
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- MDNode *Node = SP.getNode();
- ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
- I = DbgConcreteScopeMap.find(Node);
-
- if (I == DbgConcreteScopeMap.end()) {
- // FIXME: Can this situation actually happen? And if so, should it?
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return 0;
- }
-
- SmallVector<DbgScope *, 8> &Scopes = I->second;
- if (Scopes.empty()) {
- // Returned ID is 0 if this is unbalanced "end of inlined
- // scope". This could happen if optimizer eats dbg intrinsics
- // or "beginning of inlined scope" is not recoginized due to
- // missing location info. In such cases, ignore this region.end.
- return 0;
- }
-
- DbgScope *Scope = Scopes.back(); Scopes.pop_back();
- unsigned ID = MMI->NextLabelID();
- MMI->RecordUsedDbgLabel(ID);
- Scope->setEndLabelID(ID);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return ID;
-}
-
//===----------------------------------------------------------------------===//
// Emit Methods
//===----------------------------------------------------------------------===//
@@ -2470,10 +2375,7 @@ void DwarfDebug::EmitDIE(DIE *Die) {
case dwarf::DW_AT_abstract_origin: {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
- unsigned Addr =
- CompileUnitOffsets[Die->getAbstractCompileUnit()] +
- Origin->getOffset();
-
+ unsigned Addr = Origin->getOffset();
Asm->EmitInt32(Addr);
break;
}
@@ -3002,10 +2904,14 @@ void DwarfDebug::EmitDebugInlineInfo() {
Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
- for (ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
- I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
- MDNode *Node = I->first;
- SmallVector<unsigned, 4> &Labels = I->second;
+ for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+// for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
+ MDNode *Node = *I;
+ ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node);
+ SmallVector<InlineInfoLabels, 4> &Labels = II->second;
DISubprogram SP(Node);
const char *LName = SP.getLinkageName();
const char *Name = SP.getName();
@@ -3019,17 +2925,21 @@ void DwarfDebug::EmitDebugInlineInfo() {
// __asm__ attribute.
if (LName[0] == 1)
LName = &LName[1];
- Asm->EmitString(LName);
+// Asm->EmitString(LName);
+ EmitSectionOffset("string", "section_str",
+ StringPool.idFor(LName), false, true);
+
}
Asm->EOL("MIPS linkage name");
-
- Asm->EmitString(Name); Asm->EOL("Function name");
-
+// Asm->EmitString(Name);
+ EmitSectionOffset("string", "section_str",
+ StringPool.idFor(Name), false, true);
+ Asm->EOL("Function name");
Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count");
- for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
+ for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
LE = Labels.end(); LI != LE; ++LI) {
- DIE *SP = ModuleCU->getDieMapSlotFor(Node);
+ DIE *SP = LI->second;
Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
if (TD->getPointerSize() == sizeof(int32_t))
@@ -3037,7 +2947,7 @@ void DwarfDebug::EmitDebugInlineInfo() {
else
O << MAI->getData64bitsDirective();
- PrintLabelName("label", *LI); Asm->EOL("low_pc");
+ PrintLabelName("label", LI->first); Asm->EOL("low_pc");
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ddb0a15ed78d..646de8f36e14 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,9 +30,9 @@
namespace llvm {
class CompileUnit;
-class DbgVariable;
-class DbgScope;
class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
class MachineFrameInfo;
class MachineModuleInfo;
class MCAsmInfo;
@@ -41,7 +41,7 @@ class Timer;
//===----------------------------------------------------------------------===//
/// SrcLineInfo - This class is used to record source line correspondence.
///
-class VISIBILITY_HIDDEN SrcLineInfo {
+class SrcLineInfo {
unsigned Line; // Source line number.
unsigned Column; // Source column.
unsigned SourceID; // Source ID number.
@@ -57,7 +57,7 @@ public:
unsigned getLabelID() const { return LabelID; }
};
-class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
+class DwarfDebug : public Dwarf {
//===--------------------------------------------------------------------===//
// Attributes used to construct specific Dwarf sections.
//
@@ -134,52 +134,52 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
///
bool shouldEmit;
- // FunctionDbgScope - Top level scope for the current function.
+ // CurrentFnDbgScope - Top level scope for the current function.
//
- DbgScope *FunctionDbgScope;
+ DbgScope *CurrentFnDbgScope;
/// DbgScopeMap - Tracks the scopes in the current function.
+ ///
ValueMap<MDNode *, DbgScope *> DbgScopeMap;
+ /// ConcreteScopes - Tracks the concrete scopees in the current function.
+ /// These scopes are also included in DbgScopeMap.
+ ValueMap<MDNode *, DbgScope *> ConcreteScopes;
+
+ /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+ /// not included DbgScopeMap.
+ ValueMap<MDNode *, DbgScope *> AbstractScopes;
+ SmallVector<DbgScope *, 4>AbstractScopesList;
+
+ /// AbstractVariables - Collection on abstract variables.
+ ValueMap<MDNode *, DbgVariable *> AbstractVariables;
+
+ /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+ /// (at the end of the module) as DW_AT_inline.
+ SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+ /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
+ SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
+
/// ScopedGVs - Tracks global variables that are not at file scope.
/// For example void f() { static int b = 42; }
SmallVector<WeakVH, 4> ScopedGVs;
- typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> >
+ typedef SmallVector<DbgScope *, 2> ScopeVector;
+ typedef DenseMap<const MachineInstr *, ScopeVector>
InsnToDbgScopeMapTy;
- /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts.
+ /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
InsnToDbgScopeMapTy DbgScopeBeginMap;
/// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
InsnToDbgScopeMapTy DbgScopeEndMap;
- /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
- /// function.
- ValueMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
-
- /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
- /// function.
- ValueMap<MDNode *,
- SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
-
/// InlineInfo - Keep track of inlined functions and their location. This
/// information is used to populate debug_inlined section.
- ValueMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
-
- /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
- /// functions. These are subroutine entries that contain a DW_AT_inline
- /// attribute.
- DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap;
-
- /// AbstractInstanceRootList - List of abstract instance roots of inlined
- /// functions. These are subroutine entries that contain a DW_AT_inline
- /// attribute.
- SmallVector<DbgScope *, 32> AbstractInstanceRootList;
-
- /// LexicalScopeStack - A stack of lexical scopes. The top one is the current
- /// scope.
- SmallVector<DbgScope *, 16> LexicalScopeStack;
+ typedef std::pair<unsigned, DIE *> InlineInfoLabels;
+ ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<MDNode *, 4> InlinedSPNodes;
/// CompileUnitOffsets - A vector of the offsets of the compile units. This is
/// used when calculating the "origin" of a concrete instance of an inlined
@@ -361,10 +361,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
///
DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
- /// getDbgScope - Returns the scope associated with the given descriptor.
- ///
- DbgScope *getOrCreateScope(MDNode *N);
- DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+ /// getUpdatedDbgScope - Find or create DbgScope assicated with
+ /// the instruction. Initialize scope and update scope hierarchy.
+ DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+
+ /// createDbgScope - Create DbgScope for the scope.
+ void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
+
+ DbgScope *getOrCreateAbstractScope(MDNode *N);
+
+ /// findAbstractVariable - Find abstract variable associated with Var.
+ DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+ DILocation &Loc);
+
+ DIE *UpdateSubprogramScopeDIE(MDNode *SPNode);
+ DIE *ConstructLexicalScopeDIE(DbgScope *Scope);
+ DIE *ConstructScopeDIE(DbgScope *Scope);
+ DIE *ConstructInlinedScopeDIE(DbgScope *Scope);
+ DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
/// ConstructDbgScope - Construct the components of a scope.
///
@@ -372,15 +386,6 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
unsigned ParentStartID, unsigned ParentEndID,
DIE *ParentDie, CompileUnit *Unit);
- /// ConstructFunctionDbgScope - Construct the scope for the subprogram.
- ///
- void ConstructFunctionDbgScope(DbgScope *RootScope,
- bool AbstractScope = false);
-
- /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
- ///
- void ConstructDefaultDbgScope(MachineFunction *MF);
-
/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
/// tools to recognize the object file contains Dwarf information.
void EmitInitial();
@@ -535,22 +540,6 @@ public:
unsigned getOrCreateSourceID(const std::string &DirName,
const std::string &FileName);
- /// RecordRegionStart - Indicate the start of a region.
- unsigned RecordRegionStart(MDNode *N);
-
- /// RecordRegionEnd - Indicate the end of a region.
- unsigned RecordRegionEnd(MDNode *N);
-
- /// RecordVariable - Indicate the declaration of a local variable.
- void RecordVariable(MDNode *N, unsigned FrameIndex);
-
- //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
- unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
- unsigned Line, unsigned Col);
-
- /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
- unsigned RecordInlinedFnEnd(DISubprogram &SP);
-
/// ExtractScopeInformation - Scan machine instructions in this function
/// and collect DbgScopes. Return true, if atleast one scope was found.
bool ExtractScopeInformation(MachineFunction *MF);
@@ -558,15 +547,16 @@ public:
/// CollectVariableInfo - Populate DbgScope entries with variables' info.
void CollectVariableInfo();
- /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
- /// start with this machine instruction.
- void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label);
-
/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
/// end with this machine instruction.
void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
-};
+ /// BeginScope - Process beginning of a scope starting at Label.
+ void BeginScope(const MachineInstr *MI, unsigned Label);
+
+ /// EndScope - Prcess end of a scope.
+ void EndScope(const MachineInstr *MI);
+};
} // End of namespace llvm
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 6c03b559b77b..1c8b8f464720 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -74,6 +74,25 @@ unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
return 0;
}
+/// CreateLabelDiff - Emit a label and subtract it from the expression we
+/// already have. This is equivalent to emitting "foo - .", but we have to emit
+/// the label for "." directly.
+const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef,
+ const char *LabelName,
+ unsigned Index) {
+ SmallString<64> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << LabelName << Asm->getFunctionNumber()
+ << "_" << Index;
+ MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+ Asm->OutStreamer.EmitLabel(DotSym);
+
+ return MCBinaryExpr::CreateSub(ExprRef,
+ MCSymbolRefExpr::Create(DotSym,
+ Asm->OutContext),
+ Asm->OutContext);
+}
+
/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
/// is shared among many Frame Description Entries. There is at least one CIE
/// in every non-empty .debug_frame section.
@@ -176,24 +195,10 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
// If there is a personality, we need to indicate the function's location.
if (PersonalityRef) {
- // If the reference to the personality function symbol is not already
- // pc-relative, then we need to subtract our current address from it. Do
- // this by emitting a label and subtracting it from the expression we
- // already have. This is equivalent to emitting "foo - .", but we have to
- // emit the label for "." directly.
- if (!IsPersonalityPCRel) {
- SmallString<64> Name;
- raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
- << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index;
- MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
- Asm->OutStreamer.EmitLabel(DotSym);
-
- PersonalityRef =
- MCBinaryExpr::CreateSub(PersonalityRef,
- MCSymbolRefExpr::Create(DotSym,Asm->OutContext),
- Asm->OutContext);
- }
-
+ if (!IsPersonalityPCRel)
+ PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
+ Index);
+
O << MAI->getData32bitsDirective();
PersonalityRef->print(O, MAI);
Asm->EOL("Personality");
@@ -232,11 +237,16 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
// corresponding function is static, this should not be externally visible.
if (!TheFunc->hasLocalLinkage())
if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
- O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
+ O << GlobalEHDirective << EHFrameInfo.FnName << '\n';
// If corresponding function is weak definition, this should be too.
if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
- O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+ O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << '\n';
+
+ // If corresponding function is hidden, this should be too.
+ if (TheFunc->hasHiddenVisibility())
+ if (const char *HiddenDirective = MAI->getHiddenDirective())
+ O << HiddenDirective << EHFrameInfo.FnName << '\n' ;
// If there are no calls then you can't unwind. This may mean we can omit the
// EH Frame, but some environments do not handle weak absolute symbols. If
@@ -457,6 +467,39 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
return SizeActions;
}
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (MO.isGlobal()) {
+ if (Function *F = dyn_cast<Function>(MO.getGlobal())) {
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+ }
+ }
+
+ return MarkedNoUnwind;
+}
+
/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
/// has a try-range containing the call, a non-zero landing pad, and an
/// appropriate action. The entry for an ordinary call has a try-range
@@ -485,7 +528,9 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- SawPotentiallyThrowing |= MI->getDesc().isCall();
+ if (MI->getDesc().isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+
continue;
}
@@ -497,7 +542,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
SawPotentiallyThrowing = false;
// Beginning of a new try-range?
- RangeMapType::iterator L = PadMap.find(BeginLabel);
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
if (L == PadMap.end())
// Nope, it was just some random label.
continue;
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f6f50255f2e7..aff1665e9b97 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -25,13 +25,14 @@ namespace llvm {
struct LandingPadInfo;
class MachineModuleInfo;
class MCAsmInfo;
+class MCExpr;
class Timer;
class raw_ostream;
//===----------------------------------------------------------------------===//
/// DwarfException - Emits Dwarf exception handling directives.
///
-class VISIBILITY_HIDDEN DwarfException : public Dwarf {
+class DwarfException : public Dwarf {
struct FunctionEHFrameInfo {
std::string FnName;
unsigned Number;
@@ -155,6 +156,10 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions);
+ /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+ /// marked `nounwind'. Return `false' otherwise.
+ bool CallToNoUnwindFunction(const MachineInstr *MI);
+
/// ComputeCallSiteTable - Compute the call-site table. The entry for an
/// invoke has a try-range containing the call, a non-zero landing pad and an
/// appropriate action. The entry for an ordinary call has a try-range
@@ -168,6 +173,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
const SmallVectorImpl<unsigned> &FirstActions);
void EmitExceptionTable();
+ /// CreateLabelDiff - Emit a label and subtract it from the expression we
+ /// already have. This is equivalent to emitting "foo - .", but we have to
+ /// emit the label for "." directly.
+ const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName,
+ unsigned Index);
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 33ebb3bd0eb5..dedd695392e3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -29,7 +29,7 @@ namespace llvm {
class TargetData;
class TargetRegisterInfo;
- class VISIBILITY_HIDDEN Dwarf {
+ class Dwarf {
protected:
//===-------------------------------------------------------------==---===//
// Core attributes used by the DWARF printer.
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 0638d3568549..63ae65368058 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -81,47 +81,20 @@ unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
return DD->RecordSourceLine(Line, Col, Scope);
}
-/// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfWriter::RecordRegionStart(MDNode *N) {
- return DD->RecordRegionStart(N);
-}
-
-/// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfWriter::RecordRegionEnd(MDNode *N) {
- return DD->RecordRegionEnd(N);
-}
-
/// getRecordSourceLineCount - Count source lines.
unsigned DwarfWriter::getRecordSourceLineCount() {
return DD->getRecordSourceLineCount();
}
-/// RecordVariable - Indicate the declaration of a local variable.
-///
-void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) {
- DD->RecordVariable(N, FrameIndex);
-}
-
/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
/// be emitted.
bool DwarfWriter::ShouldEmitDwarfDebug() const {
return DD && DD->ShouldEmitDwarfDebug();
}
-//// RecordInlinedFnStart
-unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
- unsigned Line, unsigned Col) {
- return DD->RecordInlinedFnStart(SP, CU, Line, Col);
-}
-
-/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
- return DD->RecordInlinedFnEnd(SP);
-}
-
-void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) {
- DD->SetDbgScopeEndLabels(MI, L);
+void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
+ DD->BeginScope(MI, L);
}
-void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) {
- DD->SetDbgScopeBeginLabels(MI, L);
+void DwarfWriter::EndScope(const MachineInstr *MI) {
+ DD->EndScope(MI);
}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index baea9642d4fd..94bfb7204ba2 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -40,18 +41,38 @@ using namespace llvm;
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
-static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
// Throttle for huge numbers of predecessors (compile speed problems)
static cl::opt<unsigned>
-TailMergeThreshold("tail-merge-threshold",
+TailMergeThreshold("tail-merge-threshold",
cl::desc("Max number of predecessors to consider tail merging"),
cl::init(150), cl::Hidden);
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass,
+ public BranchFolder {
+ public:
+ static char ID;
+ explicit BranchFolderPass(bool defaultEnableTailMerge)
+ : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ };
+}
char BranchFolderPass::ID = 0;
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
return new BranchFolderPass(DefaultEnableTailMerge);
}
@@ -63,7 +84,6 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
}
-
BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
@@ -77,12 +97,12 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(errs() << "\nRemoving MBB: " << *MBB);
-
+
MachineFunction *MF = MBB->getParent();
// drop all successors.
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_end()-1);
-
+
// If there are any labels in the basic block, unregister them from
// MachineModuleInfo.
if (MMI && !MBB->empty()) {
@@ -93,7 +113,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
MMI->InvalidateLabel(I->getOperand(0).getImm());
}
}
-
+
// Remove the block.
MF->erase(MBB);
}
@@ -182,6 +202,11 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
MadeChange |= MadeChangeThisIteration;
}
+ // Do tail duplication once after tail merging is done. Otherwise it is
+ // tough to avoid situations where tail duplication and tail merging undo
+ // each other's transformations ad infinitum.
+ MadeChange |= TailDuplicateBlocks(MF);
+
// See if any jump tables have become mergable or dead as the code generator
// did its thing.
MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
@@ -190,7 +215,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Figure out how these jump tables should be merged.
std::vector<unsigned> JTMapping;
JTMapping.reserve(JTs.size());
-
+
// We always keep the 0th jump table.
JTMapping.push_back(0);
@@ -202,7 +227,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
else
JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
}
-
+
// If a jump table was merge with another one, walk the function rewriting
// references to jump tables to reference the new JT ID's. Keep track of
// whether we see a jump table idx, if not, we can delete the JT.
@@ -221,7 +246,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
JTIsLive.set(NewIdx);
}
}
-
+
// Finally, remove dead jump tables. This happens either because the
// indirect jump was unreachable (and thus deleted) or because the jump
// table was merged with some other one.
@@ -245,7 +270,7 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
unsigned Hash = MI->getOpcode();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
-
+
// Merge in bits from the operand if easy.
unsigned OperandHash = 0;
switch (Op.getType()) {
@@ -267,31 +292,30 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
break;
default: break;
}
-
+
Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
}
return Hash;
}
/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
-/// with no successors, we hash two instructions, because cross-jumping
-/// only saves code when at least two instructions are removed (since a
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
/// branch must be inserted). For blocks with a successor, one of the
/// two blocks to be tail-merged will end with a branch already, so
/// it gains to cross-jump even for one instruction.
-
static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
unsigned minCommonTailLength) {
MachineBasicBlock::const_iterator I = MBB->end();
if (I == MBB->begin())
return 0; // Empty MBB.
-
+
--I;
unsigned Hash = HashMachineInstr(I);
-
+
if (I == MBB->begin() || minCommonTailLength == 1)
return Hash; // Single instr MBB.
-
+
--I;
// Hash in the second-to-last instruction.
Hash ^= HashMachineInstr(I) << 2;
@@ -307,11 +331,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock::iterator &I2) {
I1 = MBB1->end();
I2 = MBB2->end();
-
+
unsigned TailLen = 0;
while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2;
- if (!I1->isIdenticalTo(I2) ||
+ if (!I1->isIdenticalTo(I2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
@@ -332,11 +356,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
MachineBasicBlock *OldBB = OldInst->getParent();
-
+
// Remove all the old successors of OldBB from the CFG.
while (!OldBB->succ_empty())
OldBB->removeSuccessor(OldBB->succ_begin());
-
+
// Remove all the dead instructions from the end of OldBB.
OldBB->erase(OldInst, OldBB->end());
@@ -361,10 +385,10 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Move all the successors of this block to the specified block.
NewMBB->transferSuccessors(&CurMBB);
-
+
// Add an edge from CurMBB to NewMBB for the fall-through.
CurMBB.addSuccessor(NewMBB);
-
+
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
@@ -375,7 +399,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
RS->forward(prior(CurMBB.end()));
BitVector RegsLiveAtExit(TRI->getNumRegs());
RS->getRegsUsed(RegsLiveAtExit, false);
- for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++)
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
if (RegsLiveAtExit[i])
NewMBB->addLiveIn(i);
}
@@ -404,8 +428,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
// branches temporarily for tail merging). In the case where CurMBB ends
// with a conditional branch to the next block, optimize by reversing the
// test and conditionally branching to SuccMBB instead.
-
-static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
const TargetInstrInfo *TII) {
MachineFunction *MF = CurMBB->getParent();
MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
@@ -425,24 +448,43 @@ static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
}
-static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
- const std::pair<unsigned,MachineBasicBlock*> &q) {
- if (p.first < q.first)
- return true;
- else if (p.first > q.first)
- return false;
- else if (p.second->getNumber() < q.second->getNumber())
- return true;
- else if (p.second->getNumber() > q.second->getNumber())
- return false;
- else {
- // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
- // an object with itself.
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ else if (getHash() > o.getHash())
+ return false;
+ else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
#ifndef _GLIBCXX_DEBUG
- llvm_unreachable("Predecessor appears twice");
+ llvm_unreachable("Predecessor appears twice");
#endif
- return false;
+ return false;
+ }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
}
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
}
/// ProfitableToMerge - Check if two machine basic blocks have a common tail
@@ -454,21 +496,52 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
unsigned minCommonTailLength,
unsigned &CommonTailLen,
MachineBasicBlock::iterator &I1,
- MachineBasicBlock::iterator &I2) {
+ MachineBasicBlock::iterator &I2,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
MachineFunction *MF = MBB1->getParent();
- if (CommonTailLen >= minCommonTailLength)
- return true;
-
if (CommonTailLen == 0)
return false;
- // If we are optimizing for code size, 1 instruction in common is enough if
- // we don't have to split a block. At worst we will be replacing a
- // fallthrough into the common tail with a branch, which at worst breaks
- // even with falling through into the duplicated common tail.
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().getDesc().isBarrier() &&
+ !MBB2->back().getDesc().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
@@ -476,40 +549,44 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
}
/// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element). Build the vector
+/// hash CurHash (guaranteed to match the last element). Build the vector
/// SameTails of all those that have the (same) largest number of instructions
/// in common of any pair of these blocks. SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
/// instruction where the matching code sequence begins.
/// Order of elements in SameTails is the reverse of the order in which
/// those blocks appear in MergePotentials (where they are not necessarily
/// consecutive).
-unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
- unsigned minCommonTailLength) {
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
unsigned maxCommonTailLength = 0U;
SameTails.clear();
MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
MPIterator HighestMPIter = prior(MergePotentials.end());
for (MPIterator CurMPIter = prior(MergePotentials.end()),
- B = MergePotentials.begin();
- CurMPIter!=B && CurMPIter->first==CurHash;
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash;
--CurMPIter) {
- for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
+ for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
unsigned CommonTailLen;
- if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength,
- CommonTailLen, TrialBBI1, TrialBBI2)) {
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
HighestMPIter = CurMPIter;
- SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1));
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
}
if (HighestMPIter == CurMPIter &&
CommonTailLen == maxCommonTailLength)
- SameTails.push_back(std::make_pair(I, TrialBBI2));
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
}
- if (I==B)
+ if (I == B)
break;
}
}
@@ -518,21 +595,21 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
/// MergePotentials, restoring branches at ends of blocks as appropriate.
-void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
- MachineBasicBlock* SuccBB,
- MachineBasicBlock* PredBB) {
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
MPIterator CurMPIter, B;
- for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
- CurMPIter->first==CurHash;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash;
--CurMPIter) {
// Put the unconditional branch back, if we need one.
- MachineBasicBlock *CurMBB = CurMPIter->second;
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
if (SuccBB && CurMBB != PredBB)
FixTail(CurMBB, SuccBB, TII);
- if (CurMPIter==B)
+ if (CurMPIter == B)
break;
}
- if (CurMPIter->first!=CurHash)
+ if (CurMPIter->getHash() != CurHash)
CurMPIter++;
MergePotentials.erase(CurMPIter, MergePotentials.end());
}
@@ -541,35 +618,37 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
/// only of the common tail. Create a block that does by splitting one.
unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
unsigned maxCommonTailLength) {
- unsigned i, commonTailIndex;
+ unsigned commonTailIndex = 0;
unsigned TimeEstimate = ~0U;
- for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
// Use PredBB if possible; that doesn't require a new branch.
- if (SameTails[i].first->second==PredBB) {
+ if (SameTails[i].getBlock() == PredBB) {
commonTailIndex = i;
break;
}
// Otherwise, make a (fairly bogus) choice based on estimate of
// how long it will take the various blocks to execute.
- unsigned t = EstimateRuntime(SameTails[i].first->second->begin(),
- SameTails[i].second);
- if (t<=TimeEstimate) {
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
TimeEstimate = t;
commonTailIndex = i;
}
}
- MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
- MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
- DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size "
+ DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
<< maxCommonTailLength);
MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
- SameTails[commonTailIndex].first->second = newMBB;
- SameTails[commonTailIndex].second = newMBB->begin();
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
// If we split PredBB, newMBB is the new predecessor.
- if (PredBB==MBB)
+ if (PredBB == MBB)
PredBB = newMBB;
return commonTailIndex;
@@ -579,35 +658,49 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
// successor, or all have no successor) can be tail-merged. If there is a
// successor, any blocks in MergePotentials that are not tail-merged and
// are not immediately before Succ must have an unconditional branch to
-// Succ added (but the predecessor/successor lists need no adjustment).
+// Succ added (but the predecessor/successor lists need no adjustment).
// The lone predecessor of Succ that falls through into Succ,
// if any, is given in PredBB.
-bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
- MachineBasicBlock* PredBB) {
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
bool MadeChange = false;
- // It doesn't make sense to save a single instruction since tail merging
- // will add a jump.
- // FIXME: Ask the target to provide the threshold?
- unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
-
- DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n');
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(errs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ errs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ errs() << "\n";
+ if (SuccBB) {
+ errs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ errs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ errs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
// Sort by hash value so that blocks with identical end sequences sort
// together.
- std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare);
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end());
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
- unsigned CurHash = prior(MergePotentials.end())->first;
-
+ unsigned CurHash = MergePotentials.back().getHash();
+
// Build SameTails, identifying the set of blocks with this hash code
// and with the maximum number of instructions in common.
- unsigned maxCommonTailLength = ComputeSameTails(CurHash,
- minCommonTailLength);
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
- // If we didn't find any pair that has at least minCommonTailLength
+ // If we didn't find any pair that has at least minCommonTailLength
// instructions in common, remove all blocks with this hash code and retry.
if (SameTails.empty()) {
RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -618,36 +711,58 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
// block, which we can't jump to), we can treat all blocks with this same
// tail at once. Use PredBB if that is one of the possibilities, as that
// will not introduce any extra branches.
- MachineBasicBlock *EntryBB = MergePotentials.begin()->second->
- getParent()->begin();
- unsigned int commonTailIndex, i;
- for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
- MachineBasicBlock *MBB = SameTails[i].first->second;
- if (MBB->begin() == SameTails[i].second && MBB != EntryBB) {
- commonTailIndex = i;
- if (MBB==PredBB)
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+ getParent()->begin();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
}
}
- if (commonTailIndex==SameTails.size()) {
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
// None of the blocks consist entirely of the common tail.
// Split a block so that one does.
- commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
}
- MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
- DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for ");
- for (unsigned int i=0; i<SameTails.size(); ++i) {
- if (commonTailIndex==i)
+ DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
continue;
- DEBUG(errs() << SameTails[i].first->second->getNumber() << ",");
+ DEBUG(errs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
- ReplaceTailWithBranchTo(SameTails[i].second, MBB);
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
- MergePotentials.erase(SameTails[i].first);
+ MergePotentials.erase(SameTails[i].getMPIter());
}
DEBUG(errs() << "\n");
// We leave commonTailIndex in the worklist in case there are other blocks
@@ -660,26 +775,27 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!EnableTailMerge) return false;
-
+
bool MadeChange = false;
// First find blocks with no successors.
MergePotentials.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
if (I->succ_empty())
- MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I));
}
+
// See if we can do any tail merging on those.
if (MergePotentials.size() < TailMergeThreshold &&
MergePotentials.size() >= 2)
- MadeChange |= TryMergeBlocks(NULL, NULL);
+ MadeChange |= TryTailMergeBlocks(NULL, NULL);
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
// to IBB, and
// (2) alter conditional branches so they branch to the other block
- // not IBB; this may require adding back an unconditional branch to IBB
+ // not IBB; this may require adding back an unconditional branch to IBB
// later, where there wasn't one coming in. E.g.
// Bcc IBB
// fallthrough to QBB
@@ -693,18 +809,19 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// a compile-time infinite loop repeatedly doing and undoing the same
// transformations.)
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ I != E; ++I) {
if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
MachineBasicBlock *IBB = I;
MachineBasicBlock *PredBB = prior(I);
MergePotentials.clear();
- for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
E2 = I->pred_end();
P != E2; ++P) {
- MachineBasicBlock* PBB = *P;
+ MachineBasicBlock *PBB = *P;
// Skip blocks that loop to themselves, can't tail merge these.
- if (PBB==IBB)
+ if (PBB == IBB)
continue;
// Visit each predecessor only once.
if (!UniquePreds.insert(PBB))
@@ -715,7 +832,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Failing case: IBB is the target of a cbr, and
// we cannot reverse the branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
- if (!Cond.empty() && TBB==IBB) {
+ if (!Cond.empty() && TBB == IBB) {
if (TII->ReverseBranchCondition(NewCond))
continue;
// This is the QBB case described above
@@ -727,20 +844,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// to have a bit in the edge so we didn't have to do all this.
if (IBB->isLandingPad()) {
MachineFunction::iterator IP = PBB; IP++;
- MachineBasicBlock* PredNextBB = NULL;
- if (IP!=MF.end())
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
PredNextBB = IP;
- if (TBB==NULL) {
- if (IBB!=PredNextBB) // fallthrough
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
continue;
} else if (FBB) {
- if (TBB!=IBB && FBB!=IBB) // cbr then ubr
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
continue;
} else if (Cond.empty()) {
- if (TBB!=IBB) // ubr
+ if (TBB != IBB) // ubr
continue;
} else {
- if (TBB!=IBB && IBB!=PredNextBB) // cbr
+ if (TBB != IBB && IBB != PredNextBB) // cbr
continue;
}
}
@@ -749,19 +866,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
TII->RemoveBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
}
- MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U),
+ *P));
}
}
- if (MergePotentials.size() >= 2)
- MadeChange |= TryMergeBlocks(I, PredBB);
- // Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in TryMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryMergeBlocks
- if (MergePotentials.size()==1 &&
- MergePotentials.begin()->second != PredBB)
- FixTail(MergePotentials.begin()->second, I, TII);
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
}
}
return MadeChange;
@@ -773,14 +891,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
bool MadeChange = false;
-
+
// Make sure blocks are numbered in order
MF.RenumberBlocks();
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
MachineBasicBlock *MBB = I++;
MadeChange |= OptimizeBlock(MBB);
-
+
// If it is dead, remove it.
if (MBB->pred_empty()) {
RemoveDeadBlock(MBB);
@@ -801,7 +919,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
///
bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
bool BranchUnAnalyzable,
- MachineBasicBlock *TBB,
+ MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond) {
MachineFunction::iterator Fallthrough = CurBB;
@@ -809,14 +927,22 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == CurBB->getParent()->end())
return false;
-
+
// If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
if (!CurBB->isSuccessor(Fallthrough))
return false;
-
- // If we couldn't analyze the branch, assume it could fall through.
- if (BranchUnAnalyzable) return true;
-
+
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicable check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier. This
+ // is over-conservative though, because if an instruction isn't actually
+ // predicated we could still treat it like a barrier.
+ if (BranchUnAnalyzable)
+ return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
+ CurBB->back().getDesc().isPredicable();
+
// If there is no branch, control always falls through.
if (TBB == 0) return true;
@@ -825,11 +951,11 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
if (MachineFunction::iterator(TBB) == Fallthrough ||
MachineFunction::iterator(FBB) == Fallthrough)
return true;
-
- // If it's an unconditional branch to some block not the fall through, it
+
+ // If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
if (Cond.empty()) return false;
-
+
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
return FBB == 0;
@@ -853,14 +979,14 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
/// fall-through to MBB1 than to fall through into MBB2. This has to return
/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
/// result in infinite loops.
-static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
// Right now, we use a simple heuristic. If MBB2 ends with a call, and
// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
// optimize branches that branch to either a return block or an assert block
// into a fallthrough to the return.
if (MBB1->empty() || MBB2->empty()) return false;
-
+
// If there is a clear successor ordering we make sure that one block
// will fall through to the next
if (MBB1->isSuccessor(MBB2)) return true;
@@ -871,14 +997,153 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
}
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+
+ // Only duplicate blocks that end with unconditional branches.
+ if (CanFallThrough(MBB))
+ continue;
+
+ MadeChange |= TailDuplicate(MBB, MF);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
+ MachineFunction &MF) {
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB->isSuccessor(TailBB))
+ return false;
+
+ // Set the limit on the number of instructions to duplicate, with a default
+ // of one less than the tail-merge threshold. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount =
+ MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ?
+ 1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1);
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned i = 0;
+ bool HasCall = false;
+ for (MachineBasicBlock::iterator I = TailBB->begin();
+ I != TailBB->end(); ++I, ++i) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (I->getDesc().isNotDuplicable()) return false;
+ // Don't duplicate more than the threshold.
+ if (i == MaxDuplicateCount) return false;
+ // Remember if we saw a call.
+ if (I->getDesc().isCall()) HasCall = true;
+ }
+ // Heuristically, don't tail-duplicate calls if it would expand code size,
+ // as it's less likely to be worth the extra cost.
+ if (i > 1 && HasCall)
+ return false;
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ if (PredBB->succ_size() > 1) continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() != 1)
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB))
+ continue;
+
+ DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+ // Clone the contents of TailBB into PredBB.
+ for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+ I != E; ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(I);
+ PredBB->insert(PredBB->end(), NewMI);
+ }
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
+
+ Changed = true;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+ TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(errs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *TailBB);
+ PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(TailBB);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// OptimizeBlock - Analyze and optimize control flow related to the specified
/// block. This is never called on the entry block.
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
++FallThrough;
-
+
// If this block is empty, make everyone use its fall-through, not the block
// explicitly. Landing pads should not do this since the landing-pad table
// points to this block. Blocks with their addresses taken shouldn't be
@@ -886,8 +1151,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
// Dead block? Leave for cleanup later.
if (MBB->pred_empty()) return MadeChange;
-
- if (FallThrough == MBB->getParent()->end()) {
+
+ if (FallThrough == MF.end()) {
// TODO: Simplify preds to not branch here if possible!
} else {
// Rewrite all predecessors of the old block to go to the fallthrough
@@ -898,8 +1163,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
// If MBB was the target of a jump table, update jump tables to go to the
// fallthrough instead.
- MBB->getParent()->getJumpTableInfo()->
- ReplaceMBBInJumpTables(MBB, FallThrough);
+ MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough);
MadeChange = true;
}
return MadeChange;
@@ -917,29 +1181,49 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// If the CFG for the prior block has extra edges, remove them.
MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
!PriorCond.empty());
-
+
// If the previous branch is conditional and both conditions go to the same
// destination, remove the branch, replacing it with an unconditional one or
// a fall-through.
if (PriorTBB && PriorTBB == PriorFBB) {
TII->RemoveBranch(PrevBB);
- PriorCond.clear();
+ PriorCond.clear();
if (PriorTBB != MBB)
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
-
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken()) {
+ DEBUG(errs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
// If the previous branch *only* branches to *this* block (conditional or
// not) remove the branch.
if (PriorTBB == MBB && PriorFBB == 0) {
TII->RemoveBranch(PrevBB);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
-
+
// If the prior block branches somewhere else on the condition and here if
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
@@ -947,9 +1231,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
-
+
// If the prior block branches here on true and somewhere else on false, and
// if the branch condition is reversible, reverse the branch to create a
// fall-through.
@@ -960,10 +1244,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
-
+
// If this block has no successors (e.g. it is a return block or ends with
// a call to a no-return function like abort or __cxa_throw) and if the pred
// falls through into this block, and if it would otherwise fall through
@@ -976,13 +1260,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator(PriorTBB) == FallThrough &&
!CanFallThrough(MBB)) {
bool DoTransform = true;
-
+
// We have to be careful that the succs of PredBB aren't both no-successor
// blocks. If neither have successors and if PredBB is the second from
// last block in the function, we'd just keep swapping the two blocks for
// last. Only do the swap if one is clearly better to fall through than
// the other.
- if (FallThrough == --MBB->getParent()->end() &&
+ if (FallThrough == --MF.end() &&
!IsBetterFallthrough(PriorTBB, MBB))
DoTransform = false;
@@ -1000,20 +1284,20 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
if (DoTransform && !MBB->succ_empty() &&
(!CanFallThrough(PriorTBB) || PriorTBB->empty()))
DoTransform = false;
-
-
+
+
if (DoTransform) {
// Reverse the branch so we will fall through on the previous true cond.
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
DEBUG(errs() << "\nMoving MBB: " << *MBB
<< "To make fallthrough to: " << *PriorTBB << "\n");
-
+
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
// Move this block to the end of the function.
- MBB->moveAfter(--MBB->getParent()->end());
+ MBB->moveAfter(--MF.end());
MadeChange = true;
++NumBranchOpts;
return MadeChange;
@@ -1021,7 +1305,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
}
}
-
+
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
SmallVector<MachineOperand, 4> CurCond;
@@ -1030,7 +1314,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// If the CFG for the prior block has extra edges, remove them.
MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
- // If this is a two-way branch, and the FBB branches to this block, reverse
+ // If this is a two-way branch, and the FBB branches to this block, reverse
// the condition so the single-basic-block loop is faster. Instead of:
// Loop: xxx; jcc Out; jmp Loop
// we want:
@@ -1042,14 +1326,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
-
-
+
// If this branch is the only thing in its block, see if we can forward
// other blocks across it.
- if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
!MBB->hasAddressTaken()) {
// This block may contain just an unconditional branch. Because there can
@@ -1068,7 +1351,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
!PrevBB.isSuccessor(MBB)) {
// If the prior block falls through into us, turn it into an
// explicit branch to us to make updates simpler.
- if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
PriorTBB != MBB && PriorFBB != MBB) {
if (PriorTBB == 0) {
assert(PriorCond.empty() && PriorFBB == 0 &&
@@ -1104,18 +1387,17 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
TII->RemoveBranch(*PMBB);
- NewCurCond.clear();
+ NewCurCond.clear();
TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
MadeChange = true;
++NumBranchOpts;
- PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false);
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
}
}
}
// Change any jumptables to go to the new MBB.
- MBB->getParent()->getJumpTableInfo()->
- ReplaceMBBInJumpTables(MBB, CurTBB);
+ MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB);
if (DidChange) {
++NumBranchOpts;
MadeChange = true;
@@ -1123,7 +1405,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
}
}
-
+
// Add the branch back if the block is more than just an uncond branch.
TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
}
@@ -1134,9 +1416,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// place to move this block where a fall-through will happen.
if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
PriorTBB, PriorFBB, PriorCond)) {
+
// Now we know that there was no fall-through into this block, check to
// see if it has a fall-through into its successor.
- bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
+ bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
CurCond);
if (!MBB->isLandingPad()) {
@@ -1147,12 +1430,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Analyze the branch at the end of the pred.
MachineBasicBlock *PredBB = *PI;
MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
- if (PredBB != MBB && !CanFallThrough(PredBB)
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !CanFallThrough(PredBB) &&
+ !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
&& (!CurFallsThru || !CurTBB || !CurFBB)
&& (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
// If the current block doesn't fall through, just move it.
// If the current block can fall through and does not end with a
- // conditional branch, we need to append an unconditional jump to
+ // conditional branch, we need to append an unconditional jump to
// the (current) next block. To avoid a possible compile-time
// infinite loop, move blocks only backward in this case.
// Also, if there are already 2 branches here, we cannot add a third;
@@ -1167,11 +1453,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
MBB->moveAfter(PredBB);
MadeChange = true;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
}
-
+
if (!CurFallsThru) {
// Check all successors to see if we can move this block before it.
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
@@ -1179,26 +1465,29 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Analyze the branch at the end of the block before the succ.
MachineBasicBlock *SuccBB = *SI;
MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
- std::vector<MachineOperand> SuccPrevCond;
-
+
// If this block doesn't already fall-through to that successor, and if
// the succ doesn't already have a block that can fall through into it,
// and if the successor isn't an EH destination, we can arrange for the
// fallthrough to happen.
- if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !CanFallThrough(SuccPrev) && !CurUnAnalyzable &&
!SuccBB->isLandingPad()) {
MBB->moveBefore(SuccBB);
MadeChange = true;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
-
+
// Okay, there is no really great place to put this block. If, however,
// the block before this one would be a fall-through if this block were
// removed, move this block to the end of the function.
- if (FallThrough != MBB->getParent()->end() &&
+ MachineBasicBlock *PrevTBB, *PrevFBB;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
PrevBB.isSuccessor(FallThrough)) {
- MBB->moveAfter(--MBB->getParent()->end());
+ MBB->moveAfter(--MF.end());
MadeChange = true;
return MadeChange;
}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 9763e3339a20..4920755c227b 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -11,7 +11,6 @@
#define LLVM_CODEGEN_BRANCHFOLDING_HPP
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include <vector>
namespace llvm {
@@ -20,6 +19,7 @@ namespace llvm {
class RegScavenger;
class TargetInstrInfo;
class TargetRegisterInfo;
+ template<typename T> class SmallVectorImpl;
class BranchFolder {
public:
@@ -30,11 +30,58 @@ namespace llvm {
const TargetRegisterInfo *tri,
MachineModuleInfo *mmi);
private:
- typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
typedef std::vector<MergePotentialsElt>::iterator MPIterator;
std::vector<MergePotentialsElt> MergePotentials;
- typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
std::vector<SameTailElt> SameTails;
bool EnableTailMerge;
@@ -44,18 +91,23 @@ namespace llvm {
RegScavenger *RS;
bool TailMergeBlocks(MachineFunction &MF);
- bool TryMergeBlocks(MachineBasicBlock* SuccBB,
- MachineBasicBlock* PredBB);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1);
- unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
unsigned maxCommonTailLength);
+ bool TailDuplicateBlocks(MachineFunction &MF);
+ bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+
bool OptimizeBranches(MachineFunction &MF);
bool OptimizeBlock(MachineBasicBlock *MBB);
void RemoveDeadBlock(MachineBasicBlock *MBB);
@@ -66,19 +118,6 @@ namespace llvm {
MachineBasicBlock *TBB, MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond);
};
-
-
- /// BranchFolderPass - Wrap branch folder in a machine function pass.
- class BranchFolderPass : public MachineFunctionPass,
- public BranchFolder {
- public:
- static char ID;
- explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "Control Flow Optimizer"; }
- };
}
#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 6fff12c0b0d5..e9844d84c17f 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -56,7 +56,6 @@ namespace {
MachineFunction::iterator InsertPt,
MachineFunction::iterator Begin,
MachineFunction::iterator End);
- void UpdateTerminator(MachineBasicBlock *MBB);
bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
MachineLoop *L);
bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
@@ -141,66 +140,9 @@ void CodePlacementOpt::Splice(MachineFunction &MF,
MF.splice(InsertPt, Begin, End);
- UpdateTerminator(prior(Begin));
- UpdateTerminator(OldBeginPrior);
- UpdateTerminator(OldEndPrior);
-}
-
-/// UpdateTerminator - Update the terminator instructions in MBB to account
-/// for changes to the layout. If the block previously used a fallthrough,
-/// it may now need a branch, and if it previously used branching it may now
-/// be able to use a fallthrough.
-///
-void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) {
- // A block with no successors has no concerns with fall-through edges.
- if (MBB->succ_empty()) return;
-
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
- (void) B;
- assert(!B && "UpdateTerminators requires analyzable predecessors!");
- if (Cond.empty()) {
- if (TBB) {
- // The block has an unconditional branch. If its successor is now
- // its layout successor, delete the branch.
- if (MBB->isLayoutSuccessor(TBB))
- TII->RemoveBranch(*MBB);
- } else {
- // The block has an unconditional fallthrough. If its successor is not
- // its layout successor, insert a branch.
- TBB = *MBB->succ_begin();
- if (!MBB->isLayoutSuccessor(TBB))
- TII->InsertBranch(*MBB, TBB, 0, Cond);
- }
- } else {
- if (FBB) {
- // The block has a non-fallthrough conditional branch. If one of its
- // successors is its layout successor, rewrite it to a fallthrough
- // conditional branch.
- if (MBB->isLayoutSuccessor(TBB)) {
- TII->RemoveBranch(*MBB);
- TII->ReverseBranchCondition(Cond);
- TII->InsertBranch(*MBB, FBB, 0, Cond);
- } else if (MBB->isLayoutSuccessor(FBB)) {
- TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, TBB, 0, Cond);
- }
- } else {
- // The block has a fallthrough conditional branch.
- MachineBasicBlock *MBBA = *MBB->succ_begin();
- MachineBasicBlock *MBBB = *next(MBB->succ_begin());
- if (MBBA == TBB) std::swap(MBBB, MBBA);
- if (MBB->isLayoutSuccessor(TBB)) {
- TII->RemoveBranch(*MBB);
- TII->ReverseBranchCondition(Cond);
- TII->InsertBranch(*MBB, MBBA, 0, Cond);
- } else if (!MBB->isLayoutSuccessor(MBBA)) {
- TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, TBB, MBBA, Cond);
- }
- }
- }
+ prior(Begin)->updateTerminator();
+ OldBeginPrior->updateTerminator();
+ OldEndPrior->updateTerminator();
}
/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3e3b28a8109b..8a3bd0bf4e00 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -515,6 +515,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
if (CI->getType() != Type::getVoidTy(Context))
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
}
assert(CI->use_empty() &&
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 794ecf7bd193..23dce4a91a13 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,7 +55,10 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
SUnit *OnlyAvailablePred = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
SUnit &Pred = *I->getSUnit();
if (!Pred.isScheduled) {
// We found an available, but not scheduled, predecessor. If it's the
@@ -75,7 +78,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
unsigned NumNodesBlocking = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
if (getSingleUnscheduledPred(I->getSUnit()) == SU)
++NumNodesBlocking;
}
@@ -92,7 +98,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
AdjustPriorityOfUnscheduledPreds(I->getSUnit());
}
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2a93a35b3faf..a60d34f58a91 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -53,7 +53,8 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
static cl::opt<bool> EnableFastSpilling("fast-spill",
cl::init(false), cl::Hidden);
-static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false));
+static cl::opt<bool> EarlyCoalescing("early-coalescing",
+ cl::init(false), cl::Hidden);
static cl::opt<int> CoalescingLimit("early-coalescing-limit",
cl::init(-1), cl::Hidden);
@@ -646,17 +647,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
0, false, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
-
+
interval.addRange(LR);
LR.valno->addKill(end);
DEBUG(errs() << " +" << LR << '\n');
}
-bool
-LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
- SmallVector<MachineInstr*,16> &IdentCopies,
- SmallVector<MachineInstr*,16> &OtherCopies) {
- bool HaveConflict = false;
+bool LiveIntervals::
+isSafeAndProfitableToCoalesce(LiveInterval &DstInt,
+ LiveInterval &SrcInt,
+ SmallVector<MachineInstr*,16> &IdentCopies,
+ SmallVector<MachineInstr*,16> &OtherCopies) {
unsigned NumIdent = 0;
for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg),
re = mri_->def_end(); ri != re; ++ri) {
@@ -665,16 +666,16 @@ LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt
if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
return false;
if (SrcReg != DstInt.reg) {
+ // Non-identity copy - we cannot handle overlapping intervals
+ if (DstInt.liveAt(getInstructionIndex(MI)))
+ return false;
OtherCopies.push_back(MI);
- HaveConflict |= DstInt.liveAt(getInstructionIndex(MI));
} else {
IdentCopies.push_back(MI);
++NumIdent;
}
}
- if (!HaveConflict)
- return false; // Let coalescer handle it
return IdentCopies.size() > OtherCopies.size();
}
@@ -701,19 +702,21 @@ void LiveIntervals::performEarlyCoalescing() {
LiveInterval &SrcInt = getInterval(PHISrc);
SmallVector<MachineInstr*, 16> IdentCopies;
SmallVector<MachineInstr*, 16> OtherCopies;
- if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies))
+ if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt,
+ IdentCopies, OtherCopies))
continue;
DEBUG(errs() << "PHI Join: " << *Join);
assert(DstInt.containsOneValue() && "PHI join should have just one val#!");
+ assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 &&
+ "PHI join src should not be used elsewhere");
VNInfo *VNI = DstInt.getValNumInfo(0);
// Change the non-identity copies to directly target the phi destination.
for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) {
MachineInstr *PHICopy = OtherCopies[i];
- DEBUG(errs() << "Moving: " << *PHICopy);
-
SlotIndex MIIndex = getInstructionIndex(PHICopy);
+ DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy);
SlotIndex DefIndex = MIIndex.getDefIndex();
LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
SlotIndex StartIndex = SLR->start;
@@ -724,8 +727,7 @@ void LiveIntervals::performEarlyCoalescing() {
SrcInt.removeValNo(SLR->valno);
DEBUG(errs() << " added range [" << StartIndex << ','
<< EndIndex << "] to reg" << DstInt.reg << '\n');
- if (DstInt.liveAt(StartIndex))
- DstInt.removeRange(StartIndex, EndIndex);
+ assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!");
VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true,
VNInfoAllocator);
NewVNI->setHasPHIKill(true);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 96c655c1a9b8..16a79bb54e97 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -50,6 +50,14 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return NULL;
+}
+
void LiveVariables::VarInfo::dump() const {
errs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
@@ -222,8 +230,9 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
/// implicit defs to a machine instruction if there was an earlier def of its
/// super-register.
void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
// If there was a previous use or a "full" def all is well.
- if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) {
+ if (!LastDef && !PhysRegUse[Reg]) {
// Otherwise, the last sub-register def implicitly defines this register.
// e.g.
// AH =
@@ -257,6 +266,11 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
}
}
}
+ else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg,
+ true/*IsDef*/, true/*IsImp*/));
// Remember this use.
PhysRegUse[Reg] = MI;
@@ -641,3 +655,36 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
.push_back(BBI->getOperand(i).getReg());
}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB) {
+ const unsigned NumNew = BB->getNumber();
+ const unsigned NumDom = DomBB->getNumber();
+
+ // Update info for all live variables
+ for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
+ E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+ VarInfo &VI = getVarInfo(Reg);
+
+ // Anything live through DomBB is also live through BB.
+ if (VI.AliveBlocks.test(NumDom)) {
+ VI.AliveBlocks.set(NumNew);
+ continue;
+ }
+
+ // Variables not defined in DomBB cannot be live out.
+ const MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (!Def || Def->getParent() != DomBB)
+ continue;
+
+ // Killed by DomBB?
+ if (VI.findKill(DomBB))
+ continue;
+
+ // This register is defined in DomBB and live out
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 7fbdb128fd40..cd52825d21f1 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/raw_ostream.h"
@@ -242,6 +243,58 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
getParent()->splice(++BBI, this);
}
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty()) return;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now
+ // its layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not
+ // its layout successor, insert a branch.
+ TBB = *succ_begin();
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond);
+ }
+ } else {
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ TII->RemoveBranch(*this);
+ TII->ReverseBranchCondition(Cond);
+ TII->InsertBranch(*this, FBB, 0, Cond);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, 0, Cond);
+ }
+ } else {
+ // The block has a fallthrough conditional branch.
+ MachineBasicBlock *MBBA = *succ_begin();
+ MachineBasicBlock *MBBB = *next(succ_begin());
+ if (MBBA == TBB) std::swap(MBBB, MBBA);
+ if (isLayoutSuccessor(TBB)) {
+ TII->RemoveBranch(*this);
+ TII->ReverseBranchCondition(Cond);
+ TII->InsertBranch(*this, MBBA, 0, Cond);
+ } else if (!isLayoutSuccessor(MBBA)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, MBBA, Cond);
+ }
+ }
+ }
+}
void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
Successors.push_back(succ);
@@ -371,10 +424,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock::succ_iterator SI = succ_begin();
MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
while (SI != succ_end()) {
- if (*SI == DestA && DestA == DestB) {
- DestA = DestB = 0;
- ++SI;
- } else if (*SI == DestA) {
+ if (*SI == DestA) {
DestA = 0;
++SI;
} else if (*SI == DestB) {
@@ -397,3 +447,8 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
}
return MadeChange;
}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+ bool t) {
+ OS << "BB#" << MBB->getNumber();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5a1d9e687903..81d1301336b8 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -441,9 +441,10 @@ DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
/// index with a negative value.
///
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable) {
+ bool Immutable, bool isSS) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
- Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable));
+ Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable,
+ isSS));
return -++NumFixedObjects;
}
@@ -529,10 +530,6 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const {
unsigned MachineJumpTableInfo::getJumpTableIndex(
const std::vector<MachineBasicBlock*> &DestBBs) {
assert(!DestBBs.empty() && "Cannot create an empty jump table!");
- for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
- if (JumpTables[i].MBBs == DestBBs)
- return i;
-
JumpTables.push_back(MachineJumpTableEntry(DestBBs));
return JumpTables.size()-1;
}
@@ -544,14 +541,25 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Not making a change?");
bool MadeChange = false;
- for (size_t i = 0, e = JumpTables.size(); i != e; ++i) {
- MachineJumpTableEntry &JTE = JumpTables[i];
- for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
- if (JTE.MBBs[j] == Old) {
- JTE.MBBs[j] = New;
- MadeChange = true;
- }
- }
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool
+MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
return MadeChange;
}
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 56294d90398f..f5febc5a4ca4 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -24,7 +24,7 @@ X("Machine Function Analysis", "machine-function-analysis",
char MachineFunctionAnalysis::ID = 0;
-MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm,
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 5744c8a54552..b250faa62ae6 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -189,19 +189,19 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
/// print - Print the specified machine operand.
///
void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (!TM)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+
switch (getType()) {
case MachineOperand::MO_Register:
if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
OS << "%reg" << getReg();
} else {
- // If the instruction is embedded into a basic block, we can find the
- // target info for the instruction.
- if (TM == 0)
- if (const MachineInstr *MI = getParent())
- if (const MachineBasicBlock *MBB = MI->getParent())
- if (const MachineFunction *MF = MBB->getParent())
- TM = &MF->getTarget();
-
if (TM)
OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
else
@@ -265,7 +265,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "<jt#" << getIndex() << '>';
break;
case MachineOperand::MO_GlobalAddress:
- OS << "<ga:" << ((Value*)getGlobal())->getName();
+ OS << "<ga:";
+ WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
@@ -375,7 +376,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
/// TID NULL and no operands.
MachineInstr::MachineInstr()
- : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+ : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -395,7 +396,8 @@ void MachineInstr::addImplicitDefUseOperands() {
/// TargetInstrDesc or the numOperands if it is not zero. (for
/// instructions with variable number of operands).
MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0),
debugLoc(DebugLoc::getUnknownLoc()) {
if (!NoImp && TID->getImplicitDefs())
for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -413,7 +415,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
/// MachineInstr ctor - As above, but with a DebugLoc.
MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
bool NoImp)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
Parent(0), debugLoc(dl) {
if (!NoImp && TID->getImplicitDefs())
for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -433,7 +435,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
/// basic block.
///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0),
debugLoc(DebugLoc::getUnknownLoc()) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
if (TID->ImplicitDefs)
@@ -453,7 +456,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const TargetInstrDesc &tid)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
if (TID->ImplicitDefs)
@@ -472,7 +475,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : TID(&MI.getDesc()), NumImplicitOps(0),
+ : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0),
MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -1060,9 +1063,16 @@ void MachineInstr::dump() const {
}
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
- unsigned StartOp = 0, e = getNumOperands();
+ // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+ const MachineFunction *MF = 0;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (!TM && MF)
+ TM = &MF->getTarget();
+ }
// Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
for (; StartOp < e && getOperand(StartOp).isReg() &&
getOperand(StartOp).isDef() &&
!getOperand(StartOp).isImplicit();
@@ -1078,11 +1088,45 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << getDesc().getName();
// Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
- if (i != StartOp)
- OS << ",";
+ const MachineOperand &MO = getOperand(i);
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MF && getDesc().isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+ bool HasAliasLive = false;
+ for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
+ if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
- getOperand(i).print(OS, TM);
+ MO.print(OS, TM);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ...";
}
bool HaveSemi = false;
@@ -1098,12 +1142,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
}
}
- if (!debugLoc.isUnknown()) {
+ if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
// TODO: print InlinedAtLoc information
- const MachineFunction *MF = getParent()->getParent();
DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
DICompileUnit CU(DLT.Scope);
if (!CU.isNull())
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index de3ab273b39a..33b6b823446e 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,6 +22,7 @@
#define DEBUG_TYPE "machine-licm"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -43,6 +44,7 @@ STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
namespace {
class MachineLICM : public MachineFunctionPass {
+ MachineConstantPool *MCP;
const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -111,6 +113,11 @@ namespace {
/// be hoistable.
MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+ /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+ /// duplicate of MI. Return this instruction if it's found.
+ const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs);
+
/// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
/// the preheader that compute the same value. If it's found, do a RAU on
/// with the definition of the existing instruction rather than hoisting
@@ -153,6 +160,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "******** Machine LICM ********\n");
Changed = FirstInLoop = false;
+ MCP = MF.getConstantPool();
TM = &MF.getTarget();
TII = TM->getInstrInfo();
TRI = TM->getRegisterInfo();
@@ -234,9 +242,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
if (!I.isInvariantLoad(AA))
- // FIXME: we should be able to sink loads with no other side effects if
- // there is nothing that can change memory from here until the end of
- // block. This is a trivial form of alias analysis.
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
return false;
}
@@ -432,32 +440,12 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
}
}
-static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs,
- MachineRegisterInfo *RegInfo) {
- unsigned NumOps = MI->getNumOperands();
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- unsigned NumOps2 = PrevMI->getNumOperands();
- if (NumOps != NumOps2)
- continue;
- bool IsSame = true;
- for (unsigned j = 0; j != NumOps; ++j) {
- const MachineOperand &MO = MI->getOperand(j);
- if (MO.isReg() && MO.isDef()) {
- if (RegInfo->getRegClass(MO.getReg()) !=
- RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
- IsSame = false;
- break;
- }
- continue;
- }
- if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
- IsSame = false;
- break;
- }
- }
- if (IsSame)
+ if (TII->isIdentical(MI, PrevMI, RegInfo))
return PrevMI;
}
return 0;
@@ -465,18 +453,19 @@ static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
bool MachineLICM::EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
- if (CI != CSEMap.end()) {
- if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo)) {
- DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef())
- RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- }
- MI->eraseFromParent();
- ++NumCSEed;
- return true;
+ if (CI == CSEMap.end())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
}
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
}
return false;
}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index b62803f105e4..4b067a0aa98b 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -76,9 +76,7 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = 0;
CallsUnwindInit = 0;
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
VariableDbgInfo.clear();
-#endif
}
/// AnalyzeModule - Scan the module for global debug information.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 99812e0aa8a2..be9f68f6a725 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -175,6 +175,10 @@ FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
return new MachineVerifier(allowPhysDoubleDefs);
}
+void MachineFunction::verify() const {
+ MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
raw_ostream *OutFile = 0;
if (OutFileName) {
@@ -287,7 +291,18 @@ void MachineVerifier::visitMachineFunctionBefore() {
markReachable(&MF->front());
}
-void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+// Does iterator point to a and b as the first two elements?
+bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
// Start with minimal CFG sanity checks.
@@ -379,8 +394,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
} if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
- } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) ||
- (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) {
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
report("MBB exits via conditional branch/fall-through but the CFG "
"successors don't match the actual successors!", MBB);
}
@@ -400,8 +414,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/branch but doesn't have "
"exactly two CFG successors!", MBB);
- } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) ||
- (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) {
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
report("MBB exits via conditional branch/branch but the CFG "
"successors don't match the actual successors!", MBB);
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 8071b0a81a89..cd38dd144c58 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -15,24 +15,32 @@
#define DEBUG_TYPE "phielim"
#include "PHIElimination.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Function.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include <algorithm>
#include <map>
using namespace llvm;
STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumSplits, "Number of critical edges split on demand");
+
+static cl::opt<bool>
+SplitEdges("split-phi-edges",
+ cl::desc("Split critical edges during phi elimination"),
+ cl::init(false), cl::Hidden);
char PHIElimination::ID = 0;
static RegisterPass<PHIElimination>
@@ -40,11 +48,26 @@ X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
const PassInfo *const llvm::PHIEliminationID = &X;
+namespace llvm { FunctionPass *createLocalRegisterAllocator(); }
+
+// Should we run edge splitting?
+static bool shouldSplitEdges() {
+ // Edge splitting breaks the local register allocator. It cannot tolerate
+ // LiveVariables being run.
+ if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator)
+ return false;
+ return SplitEdges;
+}
+
void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addPreserved<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
+ AU.addPreserved<MachineDominatorTree>();
+ if (shouldSplitEdges()) {
+ AU.addRequired<LiveVariables>();
+ } else {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ }
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -53,10 +76,16 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
PHIDefs.clear();
PHIKills.clear();
- analyzePHINodes(Fn);
-
bool Changed = false;
+ // Split critical edges to help the coalescer
+ if (shouldSplitEdges())
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(Fn, *I);
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(Fn);
+
// Eliminate PHI instructions by inserting copies into predecessor blocks.
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
Changed |= EliminatePHINodes(Fn, *I);
@@ -75,7 +104,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
return Changed;
}
-
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
///
@@ -107,26 +135,28 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
return true;
}
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
-// This needs to be after any def or uses of SrcReg, but before any subsequent
-// point where control flow might jump out of the basic block.
+// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
MachineBasicBlock::iterator
llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB,
unsigned SrcReg) {
// Handle the trivial case trivially.
if (MBB.empty())
return MBB.begin();
- // If this basic block does not contain an invoke, then control flow always
- // reaches the end of it, so place the copy there. The logic below works in
- // this case too, but is more expensive.
- if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator()))
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB.isLandingPad())
return MBB.getFirstTerminator();
- // Discover any definition/uses in this basic block.
+ // Discover any defs/uses in this basic block.
SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ++RI) {
+ RE = MRI->reg_end(); RI != RE; ++RI) {
MachineInstr *DefUseMI = &*RI;
if (DefUseMI->getParent() == &MBB)
DefUsesInMBB.insert(DefUseMI);
@@ -134,14 +164,14 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPoint;
if (DefUsesInMBB.empty()) {
- // No def/uses. Insert the copy at the start of the basic block.
+ // No defs. Insert the copy at the start of the basic block.
InsertPoint = MBB.begin();
} else if (DefUsesInMBB.size() == 1) {
- // Insert the copy immediately after the definition/use.
+ // Insert the copy immediately after the def/use.
InsertPoint = *DefUsesInMBB.begin();
++InsertPoint;
} else {
- // Insert the copy immediately after the last definition/use.
+ // Insert the copy immediately after the last def/use.
InsertPoint = MBB.end();
while (!DefUsesInMBB.count(&*--InsertPoint)) {}
++InsertPoint;
@@ -155,7 +185,7 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
-///
+///
void llvm::PHIElimination::LowerAtomicPHINode(
MachineBasicBlock &MBB,
MachineBasicBlock::iterator AfterPHIsIt) {
@@ -186,7 +216,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
}
// Record PHI def.
- assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
+ assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
PHIDefs[DestReg] = &MBB;
// Update live variable information if there is any.
@@ -250,92 +280,35 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// basic block.
if (!MBBsInsertedInto.insert(&opBlock))
continue; // If the copy has already been emitted, we're done.
-
+
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
- MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg);
+ MachineBasicBlock::iterator InsertPos =
+ FindCopyInsertPoint(opBlock, MBB, SrcReg);
// Insert the copy.
TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
-
+
// We want to be able to insert a kill of the register if this PHI (aka, the
// copy we just inserted) is the last use of the source value. Live
// variable analysis conservatively handles this by saying that the value is
// live until the end of the block the PHI entry lives in. If the value
// really is dead at the PHI copy, there will be no successor blocks which
// have the value live-in.
- //
- // Check to see if the copy is the last use, and if so, update the live
- // variables information so that it knows the copy source instruction kills
- // the incoming value.
- LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
-
- // Loop over all of the successors of the basic block, checking to see if
- // the value is either live in the block, or if it is killed in the block.
+
// Also check to see if this register is in use by another PHI node which
// has not yet been eliminated. If so, it will be killed at an appropriate
// point later.
// Is it used by any PHI instructions in this block?
- bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
-
- std::vector<MachineBasicBlock*> OpSuccBlocks;
-
- // Otherwise, scan successors, including the BB the PHI node lives in.
- for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
- E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
-
- // Is it alive in this successor?
- unsigned SuccIdx = SuccMBB->getNumber();
- if (InRegVI.AliveBlocks.test(SuccIdx)) {
- ValueIsLive = true;
- break;
- }
-
- OpSuccBlocks.push_back(SuccMBB);
- }
-
- // Check to see if this value is live because there is a use in a successor
- // that kills it.
- if (!ValueIsLive) {
- switch (OpSuccBlocks.size()) {
- case 1: {
- MachineBasicBlock *MBB = OpSuccBlocks[0];
- for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
- if (InRegVI.Kills[i]->getParent() == MBB) {
- ValueIsLive = true;
- break;
- }
- break;
- }
- case 2: {
- MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
- for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
- if (InRegVI.Kills[i]->getParent() == MBB1 ||
- InRegVI.Kills[i]->getParent() == MBB2) {
- ValueIsLive = true;
- break;
- }
- break;
- }
- default:
- std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
- for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
- if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
- InRegVI.Kills[i]->getParent())) {
- ValueIsLive = true;
- break;
- }
- }
- }
+ bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
// Okay, if we now know that the value is not live out of the block, we can
// add a kill marker in this block saying that it kills the incoming value!
- if (!ValueIsLive) {
+ if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) {
// In our final twist, we have to decide which instruction kills the
// register. In most cases this is the copy, however, the first
// terminator instruction at the end of the block may also use the value.
@@ -346,7 +319,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
if (Term != opBlock.end()) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
-
+
// Check that no other terminators use values.
#ifndef NDEBUG
for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
@@ -357,16 +330,16 @@ void llvm::PHIElimination::LowerAtomicPHINode(
}
#endif
}
-
+
// Finally, mark it killed.
LV->addVirtualRegisterKilled(SrcReg, KillInst);
// This vreg no longer lives all of the way through opBlock.
unsigned opBlockNum = opBlock.getNumber();
- InRegVI.AliveBlocks.reset(opBlockNum);
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
}
}
-
+
// Really delete the PHI instruction now!
MF.DeleteMachineInstr(MPhi);
++NumAtomic;
@@ -386,3 +359,134 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(),
BBI->getOperand(i).getReg())];
}
+
+bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+ return false; // Quick exit for basic blocks without PHIs.
+ LiveVariables &LV = getAnalysis<LiveVariables>();
+ for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // We break edges when registers are live out from the predecessor block
+ // (not considering PHI nodes). If the register is live in to this block
+ // anyway, we would gain nothing from splitting.
+ if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV))
+ SplitCriticalEdge(PreMBB, &MBB);
+ }
+ }
+ return true;
+}
+
+bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV) {
+ LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
+bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV) {
+ LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+
+ if (VI.AliveBlocks.test(MBB.getNumber()))
+ return true;
+
+ // defined in MBB?
+ const MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // killed in MBB?
+ return VI.findKill(&MBB);
+}
+
+MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
+ MachineBasicBlock *B) {
+ assert(A && B && "Missing MBB end point");
+
+ MachineFunction *MF = A->getParent();
+
+ // We may need to update A's terminator, but we can't do that if AnalyzeBranch
+ // fails. If A uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*A, TBB, FBB, Cond))
+ return NULL;
+
+ ++NumSplits;
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->push_back(NMBB);
+ DEBUG(errs() << "PHIElimination splitting critical edge:"
+ " BB#" << A->getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << B->getNumber() << '\n');
+
+ A->ReplaceUsesOfBlockWith(B, NMBB);
+ // If A may fall through to B, we may have to insert a branch.
+ if (A->isLayoutSuccessor(B))
+ A->updateTerminator();
+
+ // Insert unconditional "jump B" instruction in NMBB.
+ NMBB->addSuccessor(B);
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+
+ // Fix PHI nodes in B so they refer to NMBB instead of A
+ for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
+ i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == A)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
+ LV->addNewBlock(NMBB, A);
+
+ if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->addNewBlock(NMBB, A);
+
+ return NMBB;
+}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 3d02dfdcddba..94716eef6c51 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -89,11 +89,33 @@ namespace llvm {
///
void analyzePHINodes(const MachineFunction& Fn);
- // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
- // SrcReg. This needs to be after any def or uses of SrcReg, but before
- // any subsequent point where control flow might jump out of the basic
- // block.
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ /// isLiveOut - Determine if Reg is live out from MBB, when not
+ /// considering PHI nodes. This means that Reg is either killed by
+ /// a successor block or passed through one.
+ bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV);
+
+ /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI
+ /// source registers. This means that Reg is either killed by MBB or passes
+ /// through it.
+ bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV);
+
+ /// SplitCriticalEdge - Split a critical edge from A to B by
+ /// inserting a new MBB. Update branches in A and PHI instructions
+ /// in B. Return the new block.
+ MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
+ MachineBasicBlock *B);
+
+ /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB,
unsigned SrcReg);
// SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 3ed61a267f77..5f1f1f3580c1 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -216,13 +216,14 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+ SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
- if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode))
+ if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
return false;
}
@@ -243,7 +244,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
(ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
AntiDepBreaker *ADB =
((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
- (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn) :
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
(AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
@@ -602,7 +603,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
ReleaseSucc(SU, &*I, IgnoreAntiDep);
}
}
@@ -657,7 +660,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
available = true;
for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(),
E = SUnits[i].Preds.end(); I != E; ++I) {
- if (I->getKind() != SDep::Anti) {
+ if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output)) {
available = false;
} else {
SUnits[i].NumPredsLeft -= 1;
@@ -736,7 +739,9 @@ void SchedulePostRATDList::ListScheduleTopDown(
AntiDepBreaker::AntiDepRegVector AntiDepRegs;
for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(),
E = FoundSUnit->Preds.end(); I != E; ++I) {
- if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled)
+ if (((I->getKind() == SDep::Anti) ||
+ (I->getKind() == SDep::Output)) &&
+ !I->getSUnit()->isScheduled)
AntiDepRegs.push_back(I->getReg());
}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index cce5ae817a20..8f623452e27a 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -39,8 +39,10 @@
using namespace llvm;
static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
+ cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
+ cl::Hidden);
STATISTIC(NumSplits, "Number of intervals split");
STATISTIC(NumRemats, "Number of intervals split by rematerialization");
@@ -131,17 +133,14 @@ namespace {
private:
- MachineBasicBlock::iterator
- findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
- SlotIndex&);
MachineBasicBlock::iterator
findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
- SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
+ SmallPtrSet<MachineInstr*, 4>&);
MachineBasicBlock::iterator
findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
- SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
+ SmallPtrSet<MachineInstr*, 4>&);
int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
@@ -161,7 +160,6 @@ namespace {
bool Rematerialize(unsigned vreg, VNInfo* ValNo,
MachineInstr* DefMI,
MachineBasicBlock::iterator RestorePt,
- SlotIndex RestoreIdx,
SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
MachineInstr* DefMI,
@@ -208,24 +206,6 @@ X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
const PassInfo *const llvm::PreAllocSplittingID = &X;
-
-/// findNextEmptySlot - Find a gap after the given machine instruction in the
-/// instruction index map. If there isn't one, return end().
-MachineBasicBlock::iterator
-PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
- SlotIndex &SpotIndex) {
- MachineBasicBlock::iterator MII = MI;
- if (++MII != MBB->end()) {
- SlotIndex Index =
- LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
- if (Index != SlotIndex()) {
- SpotIndex = Index;
- return MII;
- }
- }
- return MBB->end();
-}
-
/// findSpillPoint - Find a gap as far away from the given MI that's suitable
/// for spilling the current live interval. The index must be before any
/// defs and uses of the live interval register in the mbb. Return begin() if
@@ -233,8 +213,7 @@ PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
MachineBasicBlock::iterator
PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
MachineInstr *DefMI,
- SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
- SlotIndex &SpillIndex) {
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
MachineBasicBlock::iterator Pt = MBB->begin();
MachineBasicBlock::iterator MII = MI;
@@ -247,8 +226,6 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
while (MII != EndPt && !RefsInMBB.count(MII)) {
- SlotIndex Index = LIs->getInstructionIndex(MII);
-
// We can't insert the spill between the barrier (a call), and its
// corresponding call frame setup.
if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
@@ -259,9 +236,8 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
}
}
continue;
- } else if (LIs->hasGapBeforeInstr(Index)) {
+ } else {
Pt = MII;
- SpillIndex = LIs->findGapBeforeInstr(Index, true);
}
if (RefsInMBB.count(MII))
@@ -281,8 +257,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
MachineBasicBlock::iterator
PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
SlotIndex LastIdx,
- SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
- SlotIndex &RestoreIndex) {
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
// FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
// begin index accordingly.
MachineBasicBlock::iterator Pt = MBB->end();
@@ -306,7 +281,6 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
SlotIndex Index = LIs->getInstructionIndex(MII);
if (Index > LastIdx)
break;
- SlotIndex Gap = LIs->findGapBeforeInstr(Index);
// We can't insert a restore between the barrier (a call) and its
// corresponding call frame teardown.
@@ -315,9 +289,8 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
++MII;
} while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
- } else if (Gap != SlotIndex()) {
+ } else {
Pt = MII;
- RestoreIndex = Gap;
}
if (RefsInMBB.count(MII))
@@ -339,7 +312,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
if (I != IntervalSSMap.end()) {
SS = I->second;
} else {
- SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
IntervalSSMap[Reg] = SS;
}
@@ -364,10 +337,10 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
if (!DefMBB)
return false;
- DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
if (I == IntervalSSMap.end())
return false;
- DenseMap<SlotIndex, SlotIndex>::iterator
+ DenseMap<SlotIndex, SlotIndex>::const_iterator
II = Def2SpillMap.find(DefIndex);
if (II == Def2SpillMap.end())
return false;
@@ -740,7 +713,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
DefIdx = DefIdx.getDefIndex();
assert(DI->getOpcode() != TargetInstrInfo::PHI &&
- "Following NewVN isPHIDef flag incorrect. Fix me!");
+ "PHI instr in code during pre-alloc splitting.");
VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
// If the def is a move, set the copy field.
@@ -896,25 +869,22 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
MachineInstr* DefMI,
MachineBasicBlock::iterator RestorePt,
- SlotIndex RestoreIdx,
SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
MachineBasicBlock& MBB = *RestorePt->getParent();
MachineBasicBlock::iterator KillPt = BarrierMBB->end();
- SlotIndex KillIdx;
if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
- KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
+ KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
else
- KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
+ KillPt = next(MachineBasicBlock::iterator(DefMI));
if (KillPt == DefMI->getParent()->end())
return false;
- TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI);
- LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
+ TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI);
+ SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
ReconstructLiveInterval(CurrLI);
- SlotIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
RematIdx = RematIdx.getDefIndex();
RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
@@ -955,7 +925,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
if (I != IntervalSSMap.end()) {
SS = I->second;
} else {
- SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
}
MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
@@ -1086,17 +1056,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
}
// Find a point to restore the value after the barrier.
- SlotIndex RestoreIndex;
MachineBasicBlock::iterator RestorePt =
- findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
+ findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
if (RestorePt == BarrierMBB->end()) {
DEBUG(errs() << "FAILED (could not find a suitable restore point).\n");
return false;
}
if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
- if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt,
- RestoreIndex, RefsInMBB)) {
+ if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
DEBUG(errs() << "success (remat).\n");
return true;
}
@@ -1114,7 +1082,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SpillIndex = LIs->getInstructionIndex(SpillMI);
} else {
MachineBasicBlock::iterator SpillPt =
- findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex);
+ findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
if (SpillPt == BarrierMBB->begin()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
@@ -1124,10 +1092,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SS = CreateSpillStackSlot(CurrLI->reg, RC);
TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
SpillMI = prior(SpillPt);
- LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
}
} else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
- RestoreIndex, SpillIndex, SS)) {
+ LIs->getZeroIndex(), SpillIndex, SS)) {
// If it's already split, just restore the value. There is no need to spill
// the def again.
if (!DefMI) {
@@ -1144,13 +1112,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
if (DefMBB == BarrierMBB) {
// Add spill after the def and the last use before the barrier.
SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
- RefsInMBB, SpillIndex);
+ RefsInMBB);
if (SpillPt == DefMBB->begin()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
}
} else {
- SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex);
+ SpillPt = next(MachineBasicBlock::iterator(DefMI));
if (SpillPt == DefMBB->end()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
@@ -1160,7 +1128,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SS = CreateSpillStackSlot(CurrLI->reg, RC);
TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
SpillMI = prior(SpillPt);
- LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
}
}
@@ -1170,6 +1138,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
// Add restore.
bool FoldedRestore = false;
+ SlotIndex RestoreIndex;
if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
BarrierMBB, SS, RefsInMBB)) {
RestorePt = LMI;
@@ -1178,7 +1147,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
} else {
TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
MachineInstr *LoadMI = prior(RestorePt);
- LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex);
+ RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
}
// Update spill stack slot live interval.
@@ -1398,7 +1367,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
// Otherwise, this is a load-store case, so DCE them.
for (SmallPtrSet<MachineInstr*, 4>::iterator UI =
VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
- UI != UI; ++UI) {
+ UI != UE; ++UI) {
LIs->RemoveMachineInstrFromMaps(*UI);
(*UI)->eraseFromParent();
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 48567a0fc7f6..455964b5c5ad 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -77,6 +77,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
SmallVector<MachineInstr*, 8> ImpDefMIs;
MachineBasicBlock *Entry = fn.begin();
SmallPtrSet<MachineBasicBlock*,16> Visited;
+ SmallPtrSet<MachineInstr*, 8> ModInsts;
for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
@@ -201,6 +202,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineOperand &RMO = UI.getOperand();
MachineInstr *RMI = &*UI;
++UI;
+ if (ModInsts.count(RMI))
+ continue;
MachineBasicBlock *RMBB = RMI->getParent();
if (RMBB == MBB)
continue;
@@ -209,9 +212,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
Reg == SrcReg) {
+ if (RMO.isKill()) {
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+ vi.removeKill(RMI);
+ }
RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
RMI->RemoveOperand(j);
+ ModInsts.insert(RMI);
continue;
}
@@ -222,6 +230,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
RMO.setIsKill();
}
}
+ ModInsts.clear();
ImpDefRegs.clear();
ImpDefMIs.clear();
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 230a20c2a3c8..8905f757a073 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -264,7 +264,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
- FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset);
+ FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+ true, false);
}
I->setFrameIdx(FrameIdx);
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 5507646878cb..7fb3e6e6d2d2 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -43,35 +43,14 @@ static const char *const PSVNames[] = {
// Eventually these should be uniqued on LLVMContext rather than in a managed
// static. For now, we can safely use the global context for the time being to
// squeak by.
-PseudoSourceValue::PseudoSourceValue() :
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
Value(Type::getInt8PtrTy(getGlobalContext()),
- PseudoSourceValueVal) {}
+ Subclass) {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
O << PSVNames[this - *PSVs];
}
-namespace {
- /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
- /// for holding FixedStack values, which must include a frame
- /// index.
- class FixedStackPseudoSourceValue : public PseudoSourceValue {
- const int FI;
- public:
- explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {}
-
- virtual bool isConstant(const MachineFrameInfo *MFI) const;
-
- virtual bool isAliased(const MachineFrameInfo *MFI) const;
-
- virtual bool mayAlias(const MachineFrameInfo *) const;
-
- virtual void printCustom(raw_ostream &OS) const {
- OS << "FixedStack" << FI;
- }
- };
-}
-
static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
@@ -130,3 +109,7 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
// Spill slots will not alias any LLVM IR value.
return !MFI->isSpillSlotObjectIndex(FI);
}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 1957c16a2fd5..7bb020a65e7e 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -261,8 +261,8 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
return SS; // Already has space allocated?
// Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
- RC->getAlignment(),true);
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
// Assign the slot...
StackSlotForVirtReg[VirtReg] = FrameIdx;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 5757e4755469..c677d341bef9 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -693,6 +693,11 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
}
bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
+
+ // Assert that this is a valid solution to the regalloc problem.
+ assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ "Invalid (infinite cost) solution for PBQP problem.");
+
// Set to true if we have any spills
bool anotherRoundNeeded = false;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index cf90aba86b30..94680ed29921 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -100,11 +100,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
CalleeSavedRegs.set(CSRegs[i]);
}
- // RS used within emit{Pro,Epi}logue()
- if (mbb != MBB) {
- MBB = mbb;
- initRegState();
- }
+ MBB = mbb;
+ initRegState();
Tracking = false;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1363a92fed67..6b27db263b25 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -214,7 +214,10 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
unsigned MaxPredDepth = 0;
for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
E = Cur->Preds.end(); I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
SUnit *PredSU = I->getSUnit();
if (PredSU->isDepthCurrent)
MaxPredDepth = std::max(MaxPredDepth,
@@ -248,7 +251,10 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) {
unsigned MaxSuccHeight = 0;
for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
E = Cur->Succs.end(); I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isHeightCurrent)
MaxSuccHeight = std::max(MaxSuccHeight,
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index f8b219d64133..56dd53345996 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -112,12 +112,13 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
V = getUnderlyingObject(V);
if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
- MayAlias = PSV->mayAlias(MFI);
// For now, ignore PseudoSourceValues which may alias LLVM IR values
// because the code that uses this function has no way to cope with
// such aliases.
if (PSV->isAliased(MFI))
return 0;
+
+ MayAlias = PSV->mayAlias(MFI);
return V;
}
@@ -127,23 +128,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-static bool mayUnderlyingObjectForInstrAlias(const MachineInstr *MI,
- const MachineFrameInfo *MFI) {
- if (!MI->hasOneMemOperand() ||
- !(*MI->memoperands_begin())->getValue() ||
- (*MI->memoperands_begin())->isVolatile())
- return true;
-
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return true;
-
- V = getUnderlyingObject(V);
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
- return PSV->mayAlias(MFI);
- return true;
-}
-
void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch()) {
@@ -163,16 +147,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// We build scheduling units by walking a block's instruction list from bottom
// to top.
- // Remember where a generic side-effecting instruction is as we procede. If
- // ChainMMO is null, this is assumed to have arbitrary side-effects. If
- // ChainMMO is non-null, then Chain makes only a single memory reference.
- SUnit *Chain = 0;
- MachineMemOperand *ChainMMO = 0;
+ // Remember where a generic side-effecting instruction is as we procede.
+ SUnit *BarrierChain = 0, *AliasChain = 0;
- // Memory references to specific known memory locations are tracked so that
- // they can be given more precise dependencies.
- std::map<const Value *, SUnit *> MemDefs;
- std::map<const Value *, std::vector<SUnit *> > MemUses;
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
@@ -347,114 +330,132 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
- new_chain:
- // This is the conservative case. Add dependencies on all memory
- // references.
- if (Chain)
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- Chain = SU;
+ if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ }
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+ // Add SU to the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain = SU;
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references though SU.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
- PendingLoads.clear();
- for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
- E = MemDefs.end(); I != E; ++I) {
+ for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ E = AliasMemDefs.end(); I != E; ++I) {
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- I->second = SU;
}
for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
- MemUses.begin(), E = MemUses.end(); I != E; ++I) {
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
- I->second.clear();
- I->second.push_back(SU);
}
- // See if it is known to just have a single memory reference.
- MachineInstr *ChainMI = Chain->getInstr();
- const TargetInstrDesc &ChainTID = ChainMI->getDesc();
- if (!ChainTID.isCall() &&
- !ChainTID.hasUnmodeledSideEffects() &&
- ChainMI->hasOneMemOperand() &&
- !(*ChainMI->memoperands_begin())->isVolatile() &&
- (*ChainMI->memoperands_begin())->getValue())
- // We know that the Chain accesses one specific memory location.
- ChainMMO = *ChainMI->memoperands_begin();
- else
- // Unknown memory accesses. Assume the worst.
- ChainMMO = 0;
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
} else if (TID.mayStore()) {
bool MayAlias = true;
TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A store to a specific PseudoSourceValue. Add precise dependencies.
- // Handle the def in MemDefs, if there is one.
- std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
- if (I != MemDefs.end()) {
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
/*isNormalMemory=*/true));
I->second = SU;
} else {
- MemDefs[V] = SU;
+ if (MayAlias)
+ AliasMemDefs[V] = SU;
+ else
+ NonAliasMemDefs[V] = SU;
}
// Handle the uses in MemUses, if there are any.
std::map<const Value *, std::vector<SUnit *> >::iterator J =
- MemUses.find(V);
- if (J != MemUses.end()) {
+ ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+ ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
/*Reg=*/0, /*isNormalMemory=*/true));
J->second.clear();
}
if (MayAlias) {
- // Add dependencies from all the PendingLoads, since without
- // memoperands we must assume they alias anything.
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
- // Add a general dependence too, if needed.
- if (Chain)
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
}
+ // Add dependence on barrier chain, if needed.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
} else {
// Treat all other stores conservatively.
- goto new_chain;
+ goto new_alias_chain;
}
} else if (TID.mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
// Invariant load, no chain dependencies needed!
- } else if (const Value *V =
- getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
- // A load from a specific PseudoSourceValue. Add precise dependencies.
- std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
- if (I != MemDefs.end())
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
- MemUses[V].push_back(SU);
-
- // Add a general dependence too, if needed.
- if (Chain && (!ChainMMO ||
- (ChainMMO->isStore() || ChainMMO->isVolatile())))
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- } else if (MI->hasVolatileMemoryRef()) {
- // Treat volatile loads conservatively. Note that this includes
- // cases where memoperand information is unavailable.
- goto new_chain;
} else {
- // A "MayAlias" load. Depend on the general chain, as well as on
- // all stores. In the absense of MachineMemOperand information,
- // we can't even assume that the load doesn't alias well-behaved
- // memory locations.
- if (Chain)
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
- E = MemDefs.end(); I != E; ++I) {
- SUnit *DefSU = I->second;
- if (mayUnderlyingObjectForInstrAlias(DefSU->getInstr(), MFI))
- DefSU->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (const Value *V =
+ getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ if (MayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ } else {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
}
- PendingLoads.push_back(SU);
- }
+
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
}
}
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index fbe40b678639..38839c44131a 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -77,6 +77,21 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
}
}
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
+ EVT VT = OutTys[i];
+ ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
/// AnalyzeReturn - Analyze the returned values of a return,
/// incorporating info about the result values into this state.
void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5f70cb85d923..06ffdd63881f 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <set>
using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
@@ -4443,14 +4442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- // never taken branch, fold to chain
- if (N1C && N1C->isNullValue())
- return Chain;
- // unconditional branch
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
@@ -4517,22 +4515,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// Use SimplifySetCC to simplify SETCC's.
SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
false);
if (Simp.getNode()) AddToWorkList(Simp.getNode());
- ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
-
- // fold br_cc true, dest -> br dest (unconditional branch)
- if (SCCC && !SCCC->isNullValue())
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
- N->getOperand(0), N->getOperand(4));
- // fold br_cc false, dest -> unconditional fall through
- if (SCCC && SCCC->isNullValue())
- return N->getOperand(0);
-
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8e955aff98fe..7dbc136f3a62 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -324,82 +325,12 @@ bool FastISel::SelectCall(User *I) {
unsigned IID = F->getIntrinsicID();
switch (IID) {
default: break;
- case Intrinsic::dbg_stoppoint: {
- DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
- if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::None))
- setCurDebugLoc(ExtractDebugLocation(*SPI, MF.getDebugLocInfo()));
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ // FIXME - Remove this instructions once the dust settles.
return true;
- }
- case Intrinsic::dbg_region_start: {
- DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);
- if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW
- && DW->ShouldEmitDwarfDebug()) {
- unsigned ID =
- DW->RecordRegionStart(RSI->getContext());
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- BuildMI(MBB, DL, II).addImm(ID);
- }
- return true;
- }
- case Intrinsic::dbg_region_end: {
- DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);
- if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW
- && DW->ShouldEmitDwarfDebug()) {
- unsigned ID = 0;
- DISubprogram Subprogram(REI->getContext());
- if (isInlinedFnEnd(*REI, MF.getFunction())) {
- // This is end of an inlined function.
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- ID = DW->RecordInlinedFnEnd(Subprogram);
- if (ID)
- // Returned ID is 0 if this is unbalanced "end of inlined
- // scope". This could happen if optimizer eats dbg intrinsics
- // or "beginning of inlined scope" is not recoginized due to
- // missing location info. In such cases, ignore this region.end.
- BuildMI(MBB, DL, II).addImm(ID);
- } else {
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- ID = DW->RecordRegionEnd(REI->getContext());
- BuildMI(MBB, DL, II).addImm(ID);
- }
- }
- return true;
- }
- case Intrinsic::dbg_func_start: {
- DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
- if (!isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::None) || !DW
- || !DW->ShouldEmitDwarfDebug())
- return true;
-
- if (isInlinedFnStart(*FSI, MF.getFunction())) {
- // This is a beginning of an inlined function.
-
- // If llvm.dbg.func.start is seen in a new block before any
- // llvm.dbg.stoppoint intrinsic then the location info is unknown.
- // FIXME : Why DebugLoc is reset at the beginning of each block ?
- DebugLoc PrevLoc = DL;
- if (PrevLoc.isUnknown())
- return true;
- // Record the source line.
- setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
-
- DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
- DISubprogram SP(FSI->getSubprogram());
- unsigned LabelID =
- DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope),
- PrevLocTpl.Line, PrevLocTpl.Col);
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- BuildMI(MBB, DL, II).addImm(LabelID);
- return true;
- }
-
- // This is a beginning of a new function.
- MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
-
- // llvm.dbg.func_start also defines beginning of function scope.
- DW->RecordRegionStart(FSI->getSubprogram());
- return true;
- }
case Intrinsic::dbg_declare: {
DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
if (!isValidDebugInfoIntrinsic(*DI, CodeGenOpt::None) || !DW
@@ -416,11 +347,13 @@ bool FastISel::SelectCall(User *I) {
StaticAllocaMap.find(AI);
if (SI == StaticAllocaMap.end()) break; // VLAs.
int FI = SI->second;
- if (MMI)
- MMI->setVariableDbgInfo(DI->getVariable(), FI);
-#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN
- DW->RecordVariable(DI->getVariable(), FI);
-#endif
+ if (MMI) {
+ MetadataContext &TheMetadata =
+ DI->getParent()->getContext().getMetadata();
+ unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+ MDNode *Dbg = TheMetadata.getMD(MDDbgKind, DI);
+ MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+ }
return true;
}
case Intrinsic::eh_exception: {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index da311eddcae9..52b0832b0616 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -497,7 +497,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
assert(isNew && "Node emitted out of order - early");
}
-/// EmitNode - Generate machine code for an node and needed dependencies.
+/// EmitNode - Generate machine code for a node and needed dependencies.
///
void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index bb4634d04b2a..91817e4d38a4 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -97,7 +97,7 @@ public:
/// MachineInstr.
static unsigned CountOperands(SDNode *Node);
- /// EmitNode - Generate machine code for an node and needed dependencies.
+ /// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f389f7f0c9df..4f0a229a505e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -148,8 +148,11 @@ private:
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_PPCF128);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
@@ -1810,10 +1813,19 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
} else if (ConstantSDNode *V =
dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
- CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
} else {
assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
- const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext());
+ const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
CV.push_back(UndefValue::get(OpNTy));
}
}
@@ -1909,6 +1921,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
}
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
@@ -1916,9 +1929,10 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i16: LC = Call_I16; break;
- case MVT::i32: LC = Call_I32; break;
- case MVT::i64: LC = Call_I64; break;
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
case MVT::i128: LC = Call_I128; break;
}
return ExpandLibCall(LC, Node, isSigned);
@@ -2624,10 +2638,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
} else if (isSigned) {
- Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32,
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
RTLIB::SREM_I64, RTLIB::SREM_I128);
} else {
- Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32,
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
RTLIB::UREM_I64, RTLIB::UREM_I128);
}
Results.push_back(Tmp1);
@@ -2643,10 +2661,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
RTLIB::SDIV_I64, RTLIB::SDIV_I128);
else
- Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
RTLIB::UDIV_I64, RTLIB::UDIV_I128);
Results.push_back(Tmp1);
break;
@@ -2691,7 +2713,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Node->getOperand(1)));
break;
}
- Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32,
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
RTLIB::MUL_I64, RTLIB::MUL_I128);
Results.push_back(Tmp1);
break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 98e7317b493a..4530ffc4a2d0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1270,11 +1270,12 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
return Val;
FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
- SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1378,7 +1379,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
unsigned StackAlign =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
return getFrameIndex(FrameIdx, TLI.getPointerTy());
}
@@ -1394,7 +1395,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
TD->getPrefTypeAlignment(Ty2));
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align);
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
return getFrameIndex(FrameIdx, TLI.getPointerTy());
}
@@ -5814,9 +5815,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
print_types(OS, G);
- OS << " ";
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (i) OS << ", ";
+ if (i) OS << ", "; else OS << " ";
OS << (void*)getOperand(i).getNode();
if (unsigned RN = getOperand(i).getResNo())
OS << ":" << RN;
@@ -5916,7 +5916,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
APInt &SplatUndef,
unsigned &SplatBitSize,
bool &HasAnyUndefs,
- unsigned MinSplatBits) {
+ unsigned MinSplatBits,
+ bool isBigEndian) {
EVT VT = getValueType(0);
assert(VT.isVector() && "Expected a vector type");
unsigned sz = VT.getSizeInBits();
@@ -5933,12 +5934,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
unsigned int nOps = getNumOperands();
assert(nOps > 0 && "isConstantSplat has 0-size build vector");
unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
- for (unsigned i = 0; i < nOps; ++i) {
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
SDValue OpVal = getOperand(i);
- unsigned BitPos = i * EltBitSize;
+ unsigned BitPos = j * EltBitSize;
if (OpVal.getOpcode() == ISD::UNDEF)
- SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize);
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
zextOrTrunc(sz) << BitPos);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index c0d2a4d39a32..90fd95eb6352 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -26,6 +26,7 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -304,7 +305,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
}
for (; BB != EB; ++BB)
@@ -334,25 +335,6 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
DebugLoc DL;
for (BasicBlock::iterator
I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (Function *F = CI->getCalledFunction()) {
- switch (F->getIntrinsicID()) {
- default: break;
- case Intrinsic::dbg_stoppoint: {
- DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
- if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::Default))
- DL = ExtractDebugLocation(*SPI, MF->getDebugLocInfo());
- break;
- }
- case Intrinsic::dbg_func_start: {
- DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
- if (isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::Default))
- DL = ExtractDebugLocation(*FSI, MF->getDebugLocInfo());
- break;
- }
- }
- }
- }
PN = dyn_cast<PHINode>(I);
if (!PN || PN->use_empty()) continue;
@@ -947,58 +929,143 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
}
+/// Get the EVTs and ArgFlags collections that represent the return type
+/// of the given function. This does not require a DAG or a return value, and
+/// is suitable for use before any DAGs for the function are constructed.
+static void getReturnInfo(const Type* ReturnType,
+ Attributes attr, SmallVectorImpl<EVT> &OutVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
+ TargetLowering &TLI,
+ SmallVectorImpl<uint64_t> *Offsets = 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if ( NumValues == 0 ) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr & Attribute::SExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr & Attribute::ZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr & Attribute::InReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr & Attribute::SExt)
+ Flags.setSExt();
+ else if (attr & Attribute::ZExt)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ OutVTs.push_back(PartVT);
+ OutFlags.push_back(Flags);
+ }
+ }
+}
void SelectionDAGLowering::visitRet(ReturnInst &I) {
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+ if (!FLI.CanLowerReturn) {
+ unsigned DemoteReg = FLI.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) continue;
-
- SDValue RetOp = getValue(I.getOperand(i));
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
- const Function *F = I.getParent()->getParent();
- if (F->paramHasAttr(0, Attribute::SExt))
- ExtendKind = ISD::SIGN_EXTEND;
- else if (F->paramHasAttr(0, Attribute::ZExt))
- ExtendKind = ISD::ZERO_EXTEND;
+ SmallVector<SDValue, 4> Chains(NumValues);
+ EVT PtrVT = PtrValueVTs[0];
+ for (unsigned i = 0; i != NumValues; ++i)
+ Chains[i] = DAG.getStore(Chain, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ NULL, Offsets[i], false, 0);
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ }
+ else {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) continue;
+
+ SDValue RetOp = getValue(I.getOperand(i));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->paramHasAttr(0, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
- // FIXME: C calling convention requires the return type to be promoted to
- // at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->paramHasAttr(0, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (F->paramHasAttr(0, Attribute::SExt))
+ Flags.setSExt();
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
}
-
- unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
- EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
- SmallVector<SDValue, 4> Parts(NumParts);
- getCopyToParts(DAG, getCurDebugLoc(),
- SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, ExtendKind);
-
- // 'inreg' on function refers to return value
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (F->paramHasAttr(0, Attribute::InReg))
- Flags.setInReg();
-
- // Propagate extension type if any
- if (F->paramHasAttr(0, Attribute::SExt))
- Flags.setSExt();
- else if (F->paramHasAttr(0, Attribute::ZExt))
- Flags.setZExt();
-
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
}
}
@@ -1691,19 +1758,19 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
- const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
- size_t TSize = 0;
+ APInt TSize(First.getBitWidth(), 0);
for (CaseItr I = CR.Range.first, E = CR.Range.second;
I!=E; ++I)
TSize += I->size();
- if (!areJTsAllowed(TLI) || TSize <= 3)
+ if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
return false;
APInt Range = ComputeRange(First, Last);
- double Density = (double)TSize / Range.roundToDouble();
+ double Density = TSize.roundToDouble() / Range.roundToDouble();
if (Density < 0.4)
return false;
@@ -1797,32 +1864,34 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
// Size is the number of Cases represented by this range.
unsigned Size = CR.Range.second - CR.Range.first;
- const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
double FMetric = 0;
CaseItr Pivot = CR.Range.first + Size/2;
// Select optimal pivot, maximizing sum density of LHS and RHS. This will
// (heuristically) allow us to emit JumpTable's later.
- size_t TSize = 0;
+ APInt TSize(First.getBitWidth(), 0);
for (CaseItr I = CR.Range.first, E = CR.Range.second;
I!=E; ++I)
TSize += I->size();
- size_t LSize = FrontCase.size();
- size_t RSize = TSize-LSize;
+ APInt LSize = FrontCase.size();
+ APInt RSize = TSize-LSize;
DEBUG(errs() << "Selecting best pivot: \n"
<< "First: " << First << ", Last: " << Last <<'\n'
<< "LSize: " << LSize << ", RSize: " << RSize << '\n');
for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
J!=E; ++I, ++J) {
- const APInt& LEnd = cast<ConstantInt>(I->High)->getValue();
- const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue();
+ const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
- double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble();
- double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble();
+ double LDensity = (double)LSize.roundToDouble() /
+ (LEnd - First + 1ULL).roundToDouble();
+ double RDensity = (double)RSize.roundToDouble() /
+ (Last - RBegin + 1ULL).roundToDouble();
double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
DEBUG(errs() <<"=>Step\n"
@@ -3842,112 +3911,12 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
I.getOperand(1), 0, I.getOperand(2), 0));
return 0;
}
- case Intrinsic::dbg_stoppoint: {
- DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
- if (isValidDebugInfoIntrinsic(SPI, CodeGenOpt::Default)) {
- MachineFunction &MF = DAG.getMachineFunction();
- DebugLoc Loc = ExtractDebugLocation(SPI, MF.getDebugLocInfo());
- setCurDebugLoc(Loc);
-
- if (OptLevel == CodeGenOpt::None)
- DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
- SPI.getLine(),
- SPI.getColumn(),
- SPI.getContext()));
- }
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ // FIXME - Remove this instructions once the dust settles.
return 0;
- }
- case Intrinsic::dbg_region_start: {
- DwarfWriter *DW = DAG.getDwarfWriter();
- DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
- if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW
- && DW->ShouldEmitDwarfDebug()) {
- unsigned LabelID =
- DW->RecordRegionStart(RSI.getContext());
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), LabelID));
- }
- return 0;
- }
- case Intrinsic::dbg_region_end: {
- DwarfWriter *DW = DAG.getDwarfWriter();
- DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
-
- if (!isValidDebugInfoIntrinsic(REI, OptLevel) || !DW
- || !DW->ShouldEmitDwarfDebug())
- return 0;
-
- MachineFunction &MF = DAG.getMachineFunction();
- DISubprogram Subprogram(REI.getContext());
-
- if (isInlinedFnEnd(REI, MF.getFunction())) {
- // This is end of inlined function. Debugging information for inlined
- // function is not handled yet (only supported by FastISel).
- if (OptLevel == CodeGenOpt::None) {
- unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
- if (ID != 0)
- // Returned ID is 0 if this is unbalanced "end of inlined
- // scope". This could happen if optimizer eats dbg intrinsics or
- // "beginning of inlined scope" is not recoginized due to missing
- // location info. In such cases, do ignore this region.end.
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), ID));
- }
- return 0;
- }
-
- unsigned LabelID =
- DW->RecordRegionEnd(REI.getContext());
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), LabelID));
- return 0;
- }
- case Intrinsic::dbg_func_start: {
- DwarfWriter *DW = DAG.getDwarfWriter();
- DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
- if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None))
- return 0;
-
- MachineFunction &MF = DAG.getMachineFunction();
- // This is a beginning of an inlined function.
- if (isInlinedFnStart(FSI, MF.getFunction())) {
- if (OptLevel != CodeGenOpt::None)
- // FIXME: Debugging informaation for inlined function is only
- // supported at CodeGenOpt::Node.
- return 0;
-
- DebugLoc PrevLoc = CurDebugLoc;
- // If llvm.dbg.func.start is seen in a new block before any
- // llvm.dbg.stoppoint intrinsic then the location info is unknown.
- // FIXME : Why DebugLoc is reset at the beginning of each block ?
- if (PrevLoc.isUnknown())
- return 0;
-
- // Record the source line.
- setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-
- if (!DW || !DW->ShouldEmitDwarfDebug())
- return 0;
- DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
- DISubprogram SP(FSI.getSubprogram());
- DICompileUnit CU(PrevLocTpl.Scope);
- unsigned LabelID = DW->RecordInlinedFnStart(SP, CU,
- PrevLocTpl.Line,
- PrevLocTpl.Col);
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), LabelID));
- return 0;
- }
-
- // This is a beginning of a new function.
- MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-
- if (!DW || !DW->ShouldEmitDwarfDebug())
- return 0;
- // llvm.dbg.func_start also defines beginning of function scope.
- DW->RecordRegionStart(FSI.getSubprogram());
- return 0;
- }
case Intrinsic::dbg_declare: {
if (OptLevel != CodeGenOpt::None)
// FIXME: Variable debug info is not supported here.
@@ -3972,13 +3941,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
if (SI == FuncInfo.StaticAllocaMap.end())
return 0; // VLAs.
int FI = SI->second;
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+
MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
- if (MMI)
- MMI->setVariableDbgInfo(Variable, FI);
-#else
- DW->RecordVariable(Variable, FI);
-#endif
+ if (MMI) {
+ MetadataContext &TheMetadata =
+ DI.getParent()->getContext().getMetadata();
+ unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+ MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
+ MMI->setVariableDbgInfo(Variable, FI, Dbg);
+ }
return 0;
}
case Intrinsic::eh_exception: {
@@ -4233,7 +4204,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
EVT Ty = Arg.getValueType();
if (CI->getZExtValue() < 2)
- setValue(&I, DAG.getConstant(-1U, Ty));
+ setValue(&I, DAG.getConstant(-1ULL, Ty));
else
setValue(&I, DAG.getConstant(0, Ty));
return 0;
@@ -4355,6 +4326,16 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
case Intrinsic::atomic_swap:
return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ return 0;
}
}
@@ -4368,7 +4349,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
/// TargetLowering::IsEligibleForTailCallOptimization.
///
static bool
-isInTailCallPosition(const Instruction *I, Attributes RetAttr,
+isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
const TargetLowering &TLI) {
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -4395,9 +4376,14 @@ isInTailCallPosition(const Instruction *I, Attributes RetAttr,
// what the call's return type is.
if (!Ret || Ret->getNumOperands() == 0) return true;
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
// Conservatively require the attributes of the call to match those of
- // the return.
- if (F->getAttributes().getRetAttributes() != RetAttr)
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
return false;
// Otherwise, make sure the unmodified return value of I is the return value.
@@ -4431,15 +4417,52 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
MachineBasicBlock *LandingPad) {
const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ const Type *RetTy = FTy->getReturnType();
MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
unsigned BeginLabel = 0, EndLabel = 0;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
- unsigned j = 1;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<EVT, 4> OutVTs;
+ SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ SmallVector<uint64_t, 4> Offsets;
+ getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+ OutVTs, OutsFlags, TLI, &Offsets);
+
+
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+
+ SDValue DemoteStackSlot;
+
+ if (!CanLowerReturn) {
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+ FTy->getReturnType());
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
+ FTy->getReturnType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+ DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.Alignment = Align;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(FTy->getContext());
+ }
+
for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i, ++j) {
+ i != e; ++i) {
SDValue ArgNode = getValue(*i);
Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
@@ -4475,7 +4498,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
isTailCall = false;
std::pair<SDValue,SDValue> Result =
- TLI.LowerCallTo(getRoot(), CS.getType(),
+ TLI.LowerCallTo(getRoot(), RetTy,
CS.paramHasAttr(0, Attribute::SExt),
CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
@@ -4489,6 +4512,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
"Null value expected with tail call!");
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
+ else if (!CanLowerReturn && Result.second.getNode()) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+ ComputeValueVTs(TLI, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+ unsigned NumValues = OutVTs.size();
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+ DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ NULL, Offsets[i], false, 1);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ PendingLoads.push_back(Chain);
+
+ setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES,
+ getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues),
+ &Values[0], NumValues));
+ }
// As a special case, a null chain means that a tail call has
// been emitted and the DAG root is already updated.
if (Result.second.getNode())
@@ -5229,7 +5281,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurDebugLoc(),
OpInfo.CallOperand, StackSlot, NULL, 0);
@@ -5757,9 +5809,32 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
SDValue OldRoot = DAG.getRoot();
DebugLoc dl = SDL->getCurDebugLoc();
const TargetData *TD = TLI.getTargetData();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<EVT, 4> OutVTs;
+ SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ OutVTs, OutsFlags, TLI);
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+ FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
+ OutVTs, OutsFlags, DAG);
+ if (!FLI.CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, true);
+ Ins.push_back(RetArg);
+ }
// Set up the incoming argument description vector.
- SmallVector<ISD::InputArg, 16> Ins;
unsigned Idx = 1;
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
I != E; ++I, ++Idx) {
@@ -5837,6 +5912,28 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
// Set up the argument values.
unsigned i = 0;
Idx = 1;
+ if (!FLI.CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ EVT VT = ValueVTs[0];
+ EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
+ VT, AssertOp);
+
+ MachineFunction& MF = SDL->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+ FLI.DemoteRegister = SRetReg;
+ NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
++I, ++Idx) {
SmallVector<SDValue, 4> ArgValues;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index a0ec7aabd8a7..10f256c15306 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -90,6 +90,14 @@ public:
MachineFunction *MF;
MachineRegisterInfo *RegInfo;
+ /// CanLowerReturn - true iff the function's return value can be lowered to
+ /// registers.
+ bool CanLowerReturn;
+
+ /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+ /// allocated to hold a pointer to the hidden sret parameter.
+ unsigned DemoteRegister;
+
explicit FunctionLoweringInfo(TargetLowering &TLI);
/// set - Initialize this FunctionLoweringInfo with the given Function
@@ -193,9 +201,9 @@ class SelectionDAGLowering {
Case() : Low(0), High(0), BB(0) { }
Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
Low(low), High(high), BB(bb) { }
- uint64_t size() const {
- uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
- uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue();
+ APInt size() const {
+ const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+ const APInt &rLow = cast<ConstantInt>(Low)->getValue();
return (rHigh - rLow + 1ULL);
}
};
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b63d5bb2e6a1..ab5f21e4337c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -68,7 +68,7 @@ static cl::opt<bool>
EnableFastISelAbort("fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction fails"));
static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies",
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
cl::desc("Schedule copies of livein registers"),
cl::init(false));
@@ -387,13 +387,14 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
if (MDDbgKind) {
// Update DebugLoc if debug information is attached with this
// instruction.
- if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
- SDL->setCurDebugLoc(Loc);
- if (MF->getDefaultDebugLoc().isUnknown())
- MF->setDefaultDebugLoc(Loc);
- }
+ if (!isa<DbgInfoIntrinsic>(I))
+ if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+ SDL->setCurDebugLoc(Loc);
+ if (MF->getDefaultDebugLoc().isUnknown())
+ MF->setDefaultDebugLoc(Loc);
+ }
}
if (!isa<TerminatorInst>(I))
SDL->visit(*I);
@@ -750,14 +751,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
if (MDDbgKind) {
// Update DebugLoc if debug information is attached with this
// instruction.
- if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc,
- MF.getDebugLocInfo());
- FastIS->setCurDebugLoc(Loc);
- if (MF.getDefaultDebugLoc().isUnknown())
- MF.setDefaultDebugLoc(Loc);
- }
+ if (!isa<DbgInfoIntrinsic>(BI))
+ if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc,
+ MF.getDebugLocInfo());
+ FastIS->setCurDebugLoc(Loc);
+ if (MF.getDefaultDebugLoc().isUnknown())
+ MF.setDefaultDebugLoc(Loc);
+ }
}
// Just before the terminator instruction, insert instructions to
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9f36b679f3b0..2ca52a48c2a9 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -22,7 +22,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -65,22 +64,27 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::SRA_I32] = "__ashrsi3";
Names[RTLIB::SRA_I64] = "__ashrdi3";
Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
Names[RTLIB::MUL_I16] = "__mulhi3";
Names[RTLIB::MUL_I32] = "__mulsi3";
Names[RTLIB::MUL_I64] = "__muldi3";
Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
Names[RTLIB::SDIV_I16] = "__divhi3";
Names[RTLIB::SDIV_I32] = "__divsi3";
Names[RTLIB::SDIV_I64] = "__divdi3";
Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
Names[RTLIB::UDIV_I16] = "__udivhi3";
Names[RTLIB::UDIV_I32] = "__udivsi3";
Names[RTLIB::UDIV_I64] = "__udivdi3";
Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
Names[RTLIB::SREM_I16] = "__modhi3";
Names[RTLIB::SREM_I32] = "__modsi3";
Names[RTLIB::SREM_I64] = "__moddi3";
Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
Names[RTLIB::UREM_I16] = "__umodhi3";
Names[RTLIB::UREM_I32] = "__umodsi3";
Names[RTLIB::UREM_I64] = "__umoddi3";
@@ -2360,7 +2364,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
- std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
// Figure out which register class contains this reg.
const TargetRegisterInfo *RI = TM.getRegisterInfo();
@@ -2383,7 +2387,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
- if (StringsEqualNoCase(RegName, RI->getName(*I)))
+ if (RegName.equals_lower(RI->getName(*I)))
return std::make_pair(*I, RC);
}
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b5d6b471f472..3909c56bdbb2 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -709,7 +709,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
}
MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
- tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI);
+ tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
MachineInstr *NewMI = prior(MII);
if (checkForDeadDef) {
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index f3ad0d1cc0a7..f85384bc5c77 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -13,15 +13,43 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
-std::auto_ptr<IndexListEntry> IndexListEntry::emptyKeyEntry,
- IndexListEntry::tombstoneKeyEntry;
+
+// Yep - these are thread safe. See the header for details.
+namespace {
+
+
+ class EmptyIndexListEntry : public IndexListEntry {
+ public:
+ EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {}
+ };
+
+ class TombstoneIndexListEntry : public IndexListEntry {
+ public:
+ TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {}
+ };
+
+ // The following statics are thread safe. They're read only, and you
+ // can't step from them to any other list entries.
+ ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey;
+ ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey;
+}
char SlotIndexes::ID = 0;
static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
+IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
+ return &*IndexListEntryEmptyKey;
+}
+
+IndexListEntry* IndexListEntry::getTombstoneKeyEntry() {
+ return &*IndexListEntryTombstoneKey;
+}
+
+
void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(au);
@@ -51,8 +79,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
mf = &fn;
initList();
- const unsigned gap = 1;
-
// Check that the list contains only the sentinal.
assert(indexListHead->getNext() == 0 &&
"Index list non-empty at initial numbering?");
@@ -64,14 +90,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
"MachineInstr -> Index mapping non-empty at initial numbering?");
functionSize = 0;
- /*
- for (unsigned s = 0; s < SlotIndex::NUM; ++s) {
- indexList.push_back(createEntry(0, s));
- }
-
- unsigned index = gap * SlotIndex::NUM;
- */
-
unsigned index = 0;
// Iterate over the the function.
@@ -83,7 +101,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(0, index));
SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
- index += gap * SlotIndex::NUM;
+ index += SlotIndex::NUM;
for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
miItr != miEnd; ++miItr) {
@@ -93,7 +111,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(0, index));
terminatorGaps.insert(
std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += gap * SlotIndex::NUM;
+ index += SlotIndex::NUM;
}
// Insert a store index for the instr.
@@ -109,14 +127,14 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
if (Slots == 0)
Slots = 1;
- index += (Slots + 1) * gap * SlotIndex::NUM;
+ index += (Slots + 1) * SlotIndex::NUM;
}
if (mbb->getFirstTerminator() == mbb->end()) {
push_back(createEntry(0, index));
terminatorGaps.insert(
std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += gap * SlotIndex::NUM;
+ index += SlotIndex::NUM;
}
SlotIndex blockEndIndex(back(), SlotIndex::STORE);
@@ -138,21 +156,36 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
return false;
}
-void SlotIndexes::renumber() {
- assert(false && "SlotIndexes::runmuber is not fully implemented yet.");
+void SlotIndexes::renumberIndexes() {
- // Compute numbering as follows:
- // Grab an iterator to the start of the index list.
- // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
- // iterator in lock-step (though skipping it over indexes which have
- // null pointers in the instruction field).
- // At each iteration assert that the instruction pointed to in the index
- // is the same one pointed to by the MI iterator. This
+ // Renumber updates the index of every element of the index list.
+ // If all instrs in the function have been allocated an index (which has been
+ // placed in the index list in the order of instruction iteration) then the
+ // resulting numbering will match what would have been generated by the
+ // pass during the initial numbering of the function if the new instructions
+ // had been present.
- // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
- // only need to be set up once - when the first numbering is computed.
+ functionSize = 0;
+ unsigned index = 0;
+
+ for (IndexListEntry *curEntry = front(); curEntry != getTail();
+ curEntry = curEntry->getNext()) {
- assert(false && "Renumbering not supported yet.");
+ curEntry->setIndex(index);
+
+ if (curEntry->getInstr() == 0) {
+ // MBB start entry or terminator gap. Just step index by 1.
+ index += SlotIndex::NUM;
+ }
+ else {
+ ++functionSize;
+ unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+
+ index += (Slots + 1) * SlotIndex::NUM;
+ }
+ }
}
void SlotIndexes::dump() const {
@@ -167,7 +200,7 @@ void SlotIndexes::dump() const {
}
}
- for (MBB2IdxMap::iterator itr = mbb2IdxMap.begin();
+ for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
itr != mbb2IdxMap.end(); ++itr) {
errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
<< itr->second.first << ", " << itr->second.second << "]\n";
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 95e85be5b817..910732538e97 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -52,16 +52,16 @@ protected:
/// Ensures there is space before the given machine instruction, returns the
/// instruction's new number.
SlotIndex makeSpaceBefore(MachineInstr *mi) {
- if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+ //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
// FIXME: Should be updated to use rewrite-in-place methods when they're
// introduced. Currently broken.
//lis->scaleNumbering(2);
//ls->scaleNumbering(2);
- }
+ //}
SlotIndex miIdx = lis->getInstructionIndex(mi);
- assert(lis->hasGapBeforeInstr(miIdx));
+ //assert(lis->hasGapBeforeInstr(miIdx));
return miIdx;
}
@@ -69,16 +69,16 @@ protected:
/// Ensure there is space after the given machine instruction, returns the
/// instruction's new number.
SlotIndex makeSpaceAfter(MachineInstr *mi) {
- if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
+ //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
// FIXME: Should be updated to use rewrite-in-place methods when they're
// introduced. Currently broken.
// lis->scaleNumbering(2);
// ls->scaleNumbering(2);
- }
+ //}
SlotIndex miIdx = lis->getInstructionIndex(mi);
- assert(lis->hasGapAfterInstr(miIdx));
+ //assert(lis->hasGapAfterInstr(miIdx));
return miIdx;
}
@@ -99,14 +99,8 @@ protected:
true, ss, trc);
MachineBasicBlock::iterator storeInstItr(next(mi));
MachineInstr *storeInst = &*storeInstItr;
- SlotIndex storeInstIdx = miIdx.getNextIndex();
-
- assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
- "Store inst index already in use.");
- lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
-
- return storeInstIdx;
+ return lis->InsertMachineInstrInMaps(storeInst);
}
/// Insert a store of the given vreg to the given stack slot immediately
@@ -120,14 +114,8 @@ protected:
tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
MachineBasicBlock::iterator storeInstItr(prior(mi));
MachineInstr *storeInst = &*storeInstItr;
- SlotIndex storeInstIdx = miIdx.getPrevIndex();
-
- assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
- "Store inst index already in use.");
- lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
-
- return storeInstIdx;
+ return lis->InsertMachineInstrInMaps(storeInst);
}
void insertStoreAfterInstOnInterval(LiveInterval *li,
@@ -164,14 +152,8 @@ protected:
tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
MachineBasicBlock::iterator loadInstItr(next(mi));
MachineInstr *loadInst = &*loadInstItr;
- SlotIndex loadInstIdx = miIdx.getNextIndex();
-
- assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
- "Store inst index already in use.");
- lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
-
- return loadInstIdx;
+ return lis->InsertMachineInstrInMaps(loadInst);
}
/// Insert a load of the given vreg from the given stack slot immediately
@@ -186,14 +168,8 @@ protected:
tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
MachineBasicBlock::iterator loadInstItr(prior(mi));
MachineInstr *loadInst = &*loadInstItr;
- SlotIndex loadInstIdx = miIdx.getPrevIndex();
-
- assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
- "Load inst index already in use.");
-
- lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
- return loadInstIdx;
+ return lis->InsertMachineInstrInMaps(loadInst);
}
void insertLoadBeforeInstOnInterval(LiveInterval *li,
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index c646869e8a73..102e2a34a97e 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -135,14 +135,52 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
unsigned SubIdx,
- const MachineInstr *Orig) const {
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MachineOperand &MO = MI->getOperand(0);
- MO.setReg(DestReg);
- MO.setSubReg(SubIdx);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ MO.setReg(DestReg);
+ MO.setSubReg(SubIdx);
+ } else if (SubIdx) {
+ MO.setReg(TRI->getSubReg(DestReg, SubIdx));
+ } else {
+ MO.setReg(DestReg);
+ }
MBB.insert(I, MI);
}
+bool
+TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
+ const MachineInstr *Other,
+ const MachineRegisterInfo *MRI) const {
+ if (MI->getOpcode() != Other->getOpcode() ||
+ MI->getNumOperands() != Other->getNumOperands())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ assert(OMO.isReg() && OMO.isDef());
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Reg != OMO.getReg())
+ return false;
+ } else if (MRI->getRegClass(MO.getReg()) !=
+ MRI->getRegClass(OMO.getReg()))
+ return false;
+
+ continue;
+ }
+
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ }
+
+ return true;
+}
+
unsigned
TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
unsigned FnSize = 0;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0a6a0d745496..84467ed36d54 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1033,7 +1033,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI);
+ TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
ReMatRegs.set(regB);
++NumReMats;
} else {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ce3eed17c723..c8c5d861578d 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -117,8 +117,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
- RC->getAlignment(), /*isSS*/true);
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
if (LowSpillSlot == NO_STACK_SLOT)
LowSpillSlot = SS;
if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
@@ -161,8 +161,8 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
EmergencySpillSlots.find(RC);
if (I != EmergencySpillSlots.end())
return I->second;
- int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
- RC->getAlignment(), /*isSS*/true);
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
if (LowSpillSlot == NO_STACK_SLOT)
LowSpillSlot = SS;
if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index fd80f460992b..ec0abd137d6d 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -483,19 +483,20 @@ static void InvalidateKills(MachineInstr &MI,
}
/// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since it's spill instruction is removed), mark it isDead. Also checks if
+/// (since its spill instruction is removed), mark it isDead. Also checks if
/// the def MI has other definition operands that are not dead. Returns it by
/// reference.
static bool InvalidateRegDef(MachineBasicBlock::iterator I,
MachineInstr &NewDef, unsigned Reg,
- bool &HasLiveDef) {
+ bool &HasLiveDef,
+ const TargetRegisterInfo *TRI) {
// Due to remat, it's possible this reg isn't being reused. That is,
// the def of this reg (by prev MI) is now dead.
MachineInstr *DefMI = I;
MachineOperand *DefOp = NULL;
for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = DefMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+ if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
continue;
if (MO.getReg() == Reg)
DefOp = &MO;
@@ -512,7 +513,8 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I,
MachineInstr *NMI = I;
for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
MachineOperand &MO = NMI->getOperand(j);
- if (!MO.isReg() || MO.getReg() != Reg)
+ if (!MO.isReg() || MO.getReg() == 0 ||
+ (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
continue;
if (MO.isUse())
FoundUse = true;
@@ -556,11 +558,30 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
KillOps[*SR] = NULL;
RegKills.reset(*SR);
}
-
- if (!MI.isRegTiedToDefOperand(i))
- // Unless it's a two-address operand, this is the new kill.
- MO.setIsKill();
+ } else {
+ // Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
+ KillOps[SReg]->setIsKill(false);
+ unsigned KReg = KillOps[SReg]->getReg();
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
+ KillOps[*SSR] = NULL;
+ RegKills.reset(*SSR);
+ }
+ }
+ }
}
+
if (MO.isKill()) {
RegKills.set(Reg);
KillOps[Reg] = &MO;
@@ -573,7 +594,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isDef())
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
RegKills.reset(Reg);
@@ -583,6 +604,10 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
RegKills.reset(*SR);
KillOps[*SR] = NULL;
}
+ for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
}
}
@@ -601,7 +626,7 @@ static void ReMaterialize(MachineBasicBlock &MBB,
"Don't know how to remat instructions that define > 1 values!");
#endif
TII->reMaterialize(MBB, MII, DestReg,
- ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI);
+ ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI);
MachineInstr *NewMI = prior(MII);
for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = NewMI->getOperand(i);
@@ -816,11 +841,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
"A reuse cannot be a virtual register");
if (PRRU != RealPhysRegUsed) {
// What was the sub-register index?
- unsigned SubReg;
- for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++)
- if (SubReg == RealPhysRegUsed)
- break;
- assert(SubReg == RealPhysRegUsed &&
+ SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+ assert(SubIdx &&
"Operand physreg is not a sub-register of PhysRegUsed");
}
@@ -1454,7 +1476,7 @@ private:
// being reused.
for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
bool HasOtherDef = false;
- if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) {
+ if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side effects.
@@ -1704,6 +1726,7 @@ private:
// Mark is killed.
MachineInstr *CopyMI = prior(InsertLoc);
+ CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
KillOpnd->setIsKill();
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
@@ -1984,6 +2007,7 @@ private:
TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
MachineInstr *CopyMI = prior(InsertLoc);
+ CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
// This invalidates DesignatedReg.
@@ -2112,6 +2136,7 @@ private:
// virtual or needing to clobber any values if it's physical).
NextMII = &MI;
--NextMII; // backtrack to the copy.
+ NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
// Propagate the sub-register index over.
if (SubIdx) {
DefMO = NextMII->findRegisterDefOperand(DestReg);