diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2009-11-18 14:58:34 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2009-11-18 14:58:34 +0000 |
commit | 907da171cc911d701da02a5cab898a9c49dd7724 (patch) | |
tree | 6a111e552c75afc66228e3d8f19b6731e4013f10 /lib/CodeGen | |
parent | 72cc50852bec44580ee7efe1aa2076273008a6ae (diff) | |
download | src-test2-907da171cc911d701da02a5cab898a9c49dd7724.tar.gz src-test2-907da171cc911d701da02a5cab898a9c49dd7724.zip |
Notes
Diffstat (limited to 'lib/CodeGen')
55 files changed, 2644 insertions, 1864 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 86d051c102f8..c37c793b56d0 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -54,10 +54,13 @@ unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) return Node; } -void AggressiveAntiDepState::GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs) +void AggressiveAntiDepState::GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) { for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) { - if (GetGroup(Reg) == Group) + if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) Regs.push_back(Reg); } } @@ -99,12 +102,28 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) AggressiveAntiDepBreaker:: -AggressiveAntiDepBreaker(MachineFunction& MFi) : +AggressiveAntiDepBreaker(MachineFunction& MFi, + TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), State(NULL), SavedState(NULL) { + /* Collect a bitset of all registers that are only broken if they + are on the critical path. */ + for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { + BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]); + if (CriticalPathSet.none()) + CriticalPathSet = CPSet; + else + CriticalPathSet |= CPSet; + } + + DEBUG(errs() << "AntiDep Critical-Path Registers:"); + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + r = CriticalPathSet.find_next(r)) + errs() << " " << TRI->getName(r)); + DEBUG(errs() << '\n'); } AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { @@ -264,16 +283,18 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, } } -/// AntiDepPathStep - Return SUnit that SU has an anti-dependence on. -static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs, - std::vector<SDep*>& Edges) { +/// AntiDepEdges - Return in Edges the anti- and output- +/// dependencies on Regs in SU that we want to consider for breaking. +static void AntiDepEdges(SUnit *SU, + const AntiDepBreaker::AntiDepRegVector& Regs, + std::vector<SDep*>& Edges) { AntiDepBreaker::AntiDepRegSet RegSet; for (unsigned i = 0, e = Regs.size(); i < e; ++i) RegSet.insert(Regs[i]); for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { - if (P->getKind() == SDep::Anti) { + if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { unsigned Reg = P->getReg(); if (RegSet.count(Reg) != 0) { Edges.push_back(&*P); @@ -285,6 +306,31 @@ static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs, assert(RegSet.empty() && "Expected all antidep registers to be found"); } +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static SUnit *CriticalPathStep(SUnit *SU) { + SDep *Next = 0; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + if (SU != 0) { + for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + } + + return (Next) ? Next->getSUnit() : 0; +} + void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag) { unsigned *KillIndices = State->GetKillIndices(); @@ -499,11 +545,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); - // Collect all registers in the same group as AntiDepReg. These all - // need to be renamed together if we are to break the - // anti-dependence. + // Collect all referenced registers in the same group as + // AntiDepReg. These all need to be renamed together if we are to + // break the anti-dependence. std::vector<unsigned> Regs; - State->GetGroupRegs(AntiDepGroupIndex, Regs); + State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); assert(Regs.size() > 0 && "Empty register group!"); if (Regs.size() == 0) return false; @@ -544,9 +590,10 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( } // FIXME: for now just handle single register in group case... - // FIXME: check only regs that have references... - if (Regs.size() > 1) + if (Regs.size() > 1) { + DEBUG(errs() << "\tMultiple rename registers in group\n"); return false; + } // Check each possible rename register for SuperReg in round-robin // order. If that register is available, and the corresponding @@ -630,12 +677,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); - // Nothing to do if no candidates. - if (Candidates.empty()) { - DEBUG(errs() << "\n===== No anti-dependency candidates\n"); - return 0; - } - // The code below assumes that there is at least one instruction, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; @@ -655,16 +696,37 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // ...need a map from MI to SUnit. std::map<MachineInstr *, SUnit *> MISUnitMap; - - DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << - " anti-dependencies\n"); for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { SUnit *SU = &SUnits[i]; MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU)); } + // Track progress along the critical path through the SUnit graph as + // we walk the instructions. This is needed for regclasses that only + // break critical-path anti-dependencies. + SUnit *CriticalPathSU = 0; + MachineInstr *CriticalPathMI = 0; + if (CriticalPathSet.any()) { + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + SUnit *SU = &SUnits[i]; + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > + (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { + CriticalPathSU = SU; + } + } + + CriticalPathMI = CriticalPathSU->getInstr(); + } + + // Even if there are no anti-dependencies we still need to go + // through the instructions to update Def, Kills, etc. #ifndef NDEBUG - { + if (Candidates.empty()) { + DEBUG(errs() << "\n===== No anti-dependency candidates\n"); + } else { + DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << + " anti-dependencies\n"); DEBUG(errs() << "Available regs:"); for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { if (!State->IsLive(Reg)) @@ -691,14 +753,26 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Process the defs in MI... PrescanInstruction(MI, Count, PassthruRegs); - + + // The the dependence edges that represent anti- and output- + // dependencies that are candidates for breaking. std::vector<SDep*> Edges; SUnit *PathSU = MISUnitMap[MI]; AntiDepBreaker::CandidateMap::iterator citer = Candidates.find(PathSU); if (citer != Candidates.end()) - AntiDepPathStep(PathSU, citer->second, Edges); - + AntiDepEdges(PathSU, citer->second, Edges); + + // If MI is not on the critical path, then we don't rename + // registers in the CriticalPathSet. + BitVector *ExcludeRegs = NULL; + if (MI == CriticalPathMI) { + CriticalPathSU = CriticalPathStep(CriticalPathSU); + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; + } else { + ExcludeRegs = &CriticalPathSet; + } + // Ignore KILL instructions (they form a group in ScanInstruction // but don't cause any anti-dependence breaking themselves) if (MI->getOpcode() != TargetInstrInfo::KILL) { @@ -707,7 +781,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( SDep *Edge = Edges[i]; SUnit *NextSU = Edge->getSUnit(); - if (Edge->getKind() != SDep::Anti) continue; + if ((Edge->getKind() != SDep::Anti) && + (Edge->getKind() != SDep::Output)) continue; unsigned AntiDepReg = Edge->getReg(); DEBUG(errs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); @@ -717,6 +792,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Don't break anti-dependencies on non-allocatable registers. DEBUG(errs() << " (non-allocatable)\n"); continue; + } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) { + // Don't break anti-dependencies for critical path registers + // if not on the critical path + DEBUG(errs() << " (not critical-path)\n"); + continue; } else if (PassthruRegs.count(AntiDepReg) != 0) { // If the anti-dep register liveness "passes-thru", then // don't try to change it. It will be changed along with diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index c5121682bd63..e5c9a7bb3adf 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" @@ -85,8 +86,11 @@ namespace llvm { unsigned GetGroup(unsigned Reg); // GetGroupRegs - Return a vector of the registers belonging to a - // group. - void GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs); + // group. If RegRefs is non-NULL then only included referenced registers. + void GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs); // UnionGroups - Union Reg1's and Reg2's groups to form a new // group. Return the index of the GroupNode representing the @@ -114,6 +118,10 @@ namespace llvm { /// because they may not be safe to break. const BitVector AllocatableSet; + /// CriticalPathSet - The set of registers that should only be + /// renamed if they are on the critical path. + BitVector CriticalPathSet; + /// State - The state used to identify and rename anti-dependence /// registers. AggressiveAntiDepState *State; @@ -124,7 +132,8 @@ namespace llvm { AggressiveAntiDepState *SavedState; public: - AggressiveAntiDepBreaker(MachineFunction& MFi); + AggressiveAntiDepBreaker(MachineFunction& MFi, + TargetSubtarget::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); /// GetMaxTrials - As anti-dependencies are broken, additional diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 277508767e1c..b614f687a462 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -23,6 +23,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include <map> namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index bb6bd957f063..08e0eae16c35 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -35,6 +36,7 @@ #include "llvm/Support/Mangler.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" @@ -512,7 +514,7 @@ void AsmPrinter::EmitXXStructorList(Constant *List) { //===----------------------------------------------------------------------===// /// LEB 128 number encoding. -/// PrintULEB128 - Print a series of hexidecimal values (separated by commas) +/// PrintULEB128 - Print a series of hexadecimal values (separated by commas) /// representing an unsigned leb128 value. void AsmPrinter::PrintULEB128(unsigned Value) const { char Buffer[20]; @@ -525,7 +527,7 @@ void AsmPrinter::PrintULEB128(unsigned Value) const { } while (Value); } -/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas) +/// PrintSLEB128 - Print a series of hexadecimal values (separated by commas) /// representing a signed leb128 value. void AsmPrinter::PrintSLEB128(int Value) const { int Sign = Value >> (8 * sizeof(Value) - 1); @@ -546,7 +548,7 @@ void AsmPrinter::PrintSLEB128(int Value) const { // Emission and print routines // -/// PrintHex - Print a value as a hexidecimal value. +/// PrintHex - Print a value as a hexadecimal value. /// void AsmPrinter::PrintHex(int Value) const { char Buffer[20]; @@ -727,7 +729,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) { /// Special characters are emitted properly. /// \literal (Eg. '\t') \endliteral void AsmPrinter::EmitString(const std::string &String) const { - EmitString(String.c_str(), String.size()); + EmitString(String.data(), String.size()); } void AsmPrinter::EmitString(const char *String, unsigned Size) const { @@ -1357,32 +1359,31 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { /// instruction's DebugLoc. void AsmPrinter::processDebugLoc(const MachineInstr *MI, bool BeforePrintingInsn) { - if (!MAI || !DW) + if (!MAI || !DW || !MAI->doesSupportDebugInformation() + || !DW->ShouldEmitDwarfDebug()) return; DebugLoc DL = MI->getDebugLoc(); - if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { - if (!DL.isUnknown()) { - DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); - if (BeforePrintingInsn) { - if (CurDLT.Scope != 0 && PrevDLT != CurDLT) { - unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, - CurDLT.Scope); - printLabel(L); - O << '\n'; -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - DW->SetDbgScopeBeginLabels(MI, L); -#endif - } else { -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN - DW->SetDbgScopeEndLabels(MI, 0); -#endif - } - } + if (DL.isUnknown()) + return; + DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); + if (CurDLT.Scope == 0) + return; + + if (BeforePrintingInsn) { + if (CurDLT != PrevDLT) { + unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + CurDLT.Scope); + printLabel(L); + DW->BeginScope(MI, L); PrevDLT = CurDLT; } + } else { + // After printing instruction + DW->EndScope(MI); } } + /// printInlineAsm - This method formats and prints the specified machine /// instruction that is an inline asm. void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { @@ -1399,6 +1400,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + O << '\t'; + // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { @@ -1636,13 +1639,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, assert(BB->hasName() && "Address of anonymous basic block not supported yet!"); - // FIXME: This isn't guaranteed to produce a unique name even if the - // block and function have a name. - std::string Mangled = - Mang->getMangledName(F, Mang->makeNameProper(BB->getName()).c_str(), - /*ForcePrivate=*/true); + // This code must use the function name itself, and not the function number, + // since it must be possible to generate the label name from within other + // functions. + std::string FuncName = Mang->getMangledName(F); + + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA" + << FuncName.size() << '_' << FuncName << '_' + << Mang->makeNameProper(BB->getName()); - return OutContext.GetOrCreateSymbol(StringRef(Mangled)); + return OutContext.GetOrCreateSymbol(Name.str()); } MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { @@ -1817,21 +1824,80 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { /// EmitComments - Pretty-print comments for instructions void AsmPrinter::EmitComments(const MachineInstr &MI) const { - assert(VerboseAsm && !MI.getDebugLoc().isUnknown()); - - DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc()); + if (!VerboseAsm) + return; - // Print source line info. - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " SrcLine "; - if (DLT.Scope) { - DICompileUnit CU(DLT.Scope); - if (!CU.isNull()) - O << CU.getFilename() << " "; + bool Newline = false; + + if (!MI.getDebugLoc().isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc()); + + // Print source line info. + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " SrcLine "; + if (DLT.Scope) { + DICompileUnit CU(DLT.Scope); + if (!CU.isNull()) + O << CU.getFilename() << " "; + } + O << DLT.Line; + if (DLT.Col != 0) + O << ":" << DLT.Col; + Newline = true; + } + + // Check for spills and reloads + int FI; + + const MachineFrameInfo *FrameInfo = + MI.getParent()->getParent()->getFrameInfo(); + + // We assume a single instruction only has a spill or reload, not + // both. + if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Reload"; + Newline = true; + } + } + else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Folded Reload"; + Newline = true; + } + } + else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Spill"; + Newline = true; + } + } + else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Folded Spill"; + Newline = true; + } + } + + // Check for spill-induced copies + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg, + SrcSubIdx, DstSubIdx)) { + if (MI.getAsmPrinterFlag(ReloadReuse)) { + if (Newline) O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Reload Reuse"; + Newline = true; + } } - O << DLT.Line; - if (DLT.Col != 0) - O << ":" << DLT.Col; } /// PrintChildLoopComment - Print comments about child loops within @@ -1862,8 +1928,7 @@ static void PrintChildLoopComment(formatted_raw_ostream &O, } /// EmitComments - Pretty-print comments for basic blocks -void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const -{ +void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const { if (VerboseAsm) { // Add loop depth information const MachineLoop *loop = LI->getLoopFor(&MBB); diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 62b51ecd18ac..3e50a15e162d 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -29,7 +29,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a /// Dwarf abbreviation. - class VISIBILITY_HIDDEN DIEAbbrevData { + class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// unsigned Attribute; @@ -52,7 +52,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug /// information object. - class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode { + class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// unsigned Tag; @@ -113,7 +113,7 @@ namespace llvm { class CompileUnit; class DIEValue; - class VISIBILITY_HIDDEN DIE : public FoldingSetNode { + class DIE : public FoldingSetNode { protected: /// Abbrev - Buffer for constructing abbreviation. /// @@ -202,7 +202,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEValue - A debug information entry value. /// - class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode { + class DIEValue : public FoldingSetNode { public: enum { isInteger, @@ -249,7 +249,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEInteger - An integer value DIE. /// - class VISIBILITY_HIDDEN DIEInteger : public DIEValue { + class DIEInteger : public DIEValue { uint64_t Integer; public: explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} @@ -294,7 +294,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEString - A string value DIE. /// - class VISIBILITY_HIDDEN DIEString : public DIEValue { + class DIEString : public DIEValue { const std::string Str; public: explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {} @@ -326,7 +326,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEDwarfLabel - A Dwarf internal label expression DIE. // - class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue { + class DIEDwarfLabel : public DIEValue { const DWLabel Label; public: explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {} @@ -356,7 +356,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEObjectLabel - A label to an object in code or data. // - class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue { + class DIEObjectLabel : public DIEValue { const std::string Label; public: explicit DIEObjectLabel(const std::string &L) @@ -389,7 +389,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIESectionOffset - A section offset DIE. /// - class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue { + class DIESectionOffset : public DIEValue { const DWLabel Label; const DWLabel Section; bool IsEH : 1; @@ -428,7 +428,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEDelta - A simple label difference DIE. /// - class VISIBILITY_HIDDEN DIEDelta : public DIEValue { + class DIEDelta : public DIEValue { const DWLabel LabelHi; const DWLabel LabelLo; public: @@ -462,7 +462,7 @@ namespace llvm { /// DIEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) - class VISIBILITY_HIDDEN DIEEntry : public DIEValue { + class DIEEntry : public DIEValue { DIE *Entry; public: explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} @@ -497,7 +497,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEBlock - A block of values. Primarily used for location expressions. // - class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE { + class DIEBlock : public DIEValue, public DIE { unsigned Size; // Size in bytes excluding size header. public: DIEBlock() diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1372fc21685f..c62c43545c46 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -48,7 +48,7 @@ namespace llvm { //===----------------------------------------------------------------------===// /// CompileUnit - This dwarf writer support class manages information associate /// with a source file. -class VISIBILITY_HIDDEN CompileUnit { +class CompileUnit { /// ID - File identifier for source. /// unsigned ID; @@ -127,61 +127,66 @@ public: class DbgVariable { DIVariable Var; // Variable Descriptor. unsigned FrameIndex; // Variable frame index. - bool InlinedFnVar; // Variable for an inlined function. + DbgVariable *AbstractVar; // Abstract variable for this variable. + DIE *TheDIE; public: - DbgVariable(DIVariable V, unsigned I, bool IFV) - : Var(V), FrameIndex(I), InlinedFnVar(IFV) {} + DbgVariable(DIVariable V, unsigned I) + : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0) {} // Accessors. - DIVariable getVariable() const { return Var; } - unsigned getFrameIndex() const { return FrameIndex; } - bool isInlinedFnVar() const { return InlinedFnVar; } + DIVariable getVariable() const { return Var; } + unsigned getFrameIndex() const { return FrameIndex; } + void setAbstractVariable(DbgVariable *V) { AbstractVar = V; } + DbgVariable *getAbstractVariable() const { return AbstractVar; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } }; //===----------------------------------------------------------------------===// /// DbgScope - This class is used to track scope information. /// -class DbgConcreteScope; class DbgScope { DbgScope *Parent; // Parent to this scope. - DIDescriptor Desc; // Debug info descriptor for scope. - // FIXME use WeakVH for Desc. - WeakVH InlinedAt; // If this scope represents inlined - // function body then this is the location - // where this body is inlined. + DIDescriptor Desc; // Debug info descriptor for scope. + WeakVH InlinedAtLocation; // Location at which scope is inlined. + bool AbstractScope; // Abstract Scope unsigned StartLabelID; // Label ID of the beginning of scope. unsigned EndLabelID; // Label ID of the end of scope. const MachineInstr *LastInsn; // Last instruction of this scope. const MachineInstr *FirstInsn; // First instruction of this scope. SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope. SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope. - SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs. // Private state for dump() mutable unsigned IndentLevel; public: DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0) - : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0), + : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), + StartLabelID(0), EndLabelID(0), LastInsn(0), FirstInsn(0), IndentLevel(0) {} virtual ~DbgScope(); // Accessors. DbgScope *getParent() const { return Parent; } + void setParent(DbgScope *P) { Parent = P; } DIDescriptor getDesc() const { return Desc; } - MDNode *getInlinedAt() const { - return dyn_cast_or_null<MDNode>(InlinedAt); + MDNode *getInlinedAt() const { + return dyn_cast_or_null<MDNode>(InlinedAtLocation); } + MDNode *getScopeNode() const { return Desc.getNode(); } unsigned getStartLabelID() const { return StartLabelID; } unsigned getEndLabelID() const { return EndLabelID; } SmallVector<DbgScope *, 4> &getScopes() { return Scopes; } SmallVector<DbgVariable *, 8> &getVariables() { return Variables; } - SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; } void setStartLabelID(unsigned S) { StartLabelID = S; } void setEndLabelID(unsigned E) { EndLabelID = E; } void setLastInsn(const MachineInstr *MI) { LastInsn = MI; } const MachineInstr *getLastInsn() { return LastInsn; } void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; } + void setAbstractScope() { AbstractScope = true; } + bool isAbstractScope() const { return AbstractScope; } const MachineInstr *getFirstInsn() { return FirstInsn; } + /// AddScope - Add a scope to the scope. /// void AddScope(DbgScope *S) { Scopes.push_back(S); } @@ -190,10 +195,6 @@ public: /// void AddVariable(DbgVariable *V) { Variables.push_back(V); } - /// AddConcreteInst - Add a concrete instance to the scope. - /// - void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); } - void FixInstructionMarkers() { assert (getFirstInsn() && "First instruction is missing!"); if (getLastInsn()) @@ -218,11 +219,15 @@ public: void DbgScope::dump() const { raw_ostream &err = errs(); err.indent(IndentLevel); - Desc.dump(); + MDNode *N = Desc.getNode(); + N->dump(); err << " [" << StartLabelID << ", " << EndLabelID << "]\n"; + if (AbstractScope) + err << "Abstract Scope\n"; IndentLevel += 2; - + if (!Scopes.empty()) + err << "Children ...\n"; for (unsigned i = 0, e = Scopes.size(); i != e; ++i) if (Scopes[i] != this) Scopes[i]->dump(); @@ -235,7 +240,7 @@ void DbgScope::dump() const { /// DbgConcreteScope - This class is used to track a scope that holds concrete /// instance information. /// -class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope { +class DbgConcreteScope : public DbgScope { CompileUnit *Unit; DIE *Die; // Debug info for this concrete scope. public: @@ -251,8 +256,6 @@ DbgScope::~DbgScope() { delete Scopes[i]; for (unsigned j = 0, M = Variables.size(); j < M; ++j) delete Variables[j]; - for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k) - delete ConcreteInsts[k]; } } // end llvm namespace @@ -262,7 +265,7 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionSourceLines(), didInitial(false), shouldEmit(false), - FunctionDbgScope(0), DebugTimer(0) { + CurrentFnDbgScope(0), DebugTimer(0) { if (TimePassesIsEnabled) DebugTimer = new Timer("Dwarf Debug Writer", getDwarfTimerGroup()); @@ -271,11 +274,6 @@ DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = Values.size(); j < M; ++j) delete Values[j]; - for (DenseMap<const MDNode *, DbgScope *>::iterator - I = AbstractInstanceRootMap.begin(), - E = AbstractInstanceRootMap.end(); I != E;++I) - delete I->second; - delete DebugTimer; } @@ -1097,6 +1095,10 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { /// CreateGlobalVariableDIE - Create new DIE using GV. DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, const DIGlobalVariable &GV) { + // If the global variable was optmized out then no need to create debug info entry. + if (!GV.getGlobal()) return NULL; + if (!GV.getDisplayName()) return NULL; + DIE *GVDie = new DIE(dwarf::DW_TAG_variable); AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, GV.getDisplayName()); @@ -1233,9 +1235,6 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, } } - if (!SP.isLocalToUnit() && !IsInlined) - AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // DW_TAG_inlined_subroutine may refer to this DIE. DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode()); Slot = SPDie; @@ -1283,263 +1282,341 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { AddSourceLine(VariableDie, &VD); // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead. + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. if (VD.isBlockByrefVariable()) AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); else AddType(Unit, VariableDie, VD.getType()); // Add variable address. - if (!DV->isInlinedFnVar()) { - // Variables for abstract instances of inlined functions don't get a - // location. - MachineLocation Location; - Location.set(RI->getFrameRegister(*MF), - RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); - - - if (VD.hasComplexAddress()) - AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - AddAddress(VariableDie, dwarf::DW_AT_location, Location); - } + // Variables for abstract instances of inlined functions don't get a + // location. + MachineLocation Location; + Location.set(RI->getFrameRegister(*MF), + RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); + + + if (VD.hasComplexAddress()) + AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + AddAddress(VariableDie, dwarf::DW_AT_location, Location); return VariableDie; } -/// getOrCreateScope - Returns the scope associated with the given descriptor. -/// -DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI, - MDNode *InlinedAt) { - ValueMap<MDNode *, DbgScope *>::iterator VI = DbgScopeMap.find(N); - if (VI != DbgScopeMap.end()) - return VI->second; +/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction. +/// Initialize scope and update scope hierarchy. +DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, + MDNode *InlinedAt) { + assert (N && "Invalid Scope encoding!"); + assert (MI && "Missing machine instruction!"); + bool GetConcreteScope = (MI && InlinedAt); - DbgScope *Parent = NULL; + DbgScope *NScope = NULL; + + if (InlinedAt) + NScope = DbgScopeMap.lookup(InlinedAt); + else + NScope = DbgScopeMap.lookup(N); + assert (NScope && "Unable to find working scope!"); + + if (NScope->getFirstInsn()) + return NScope; - if (InlinedAt) { + DbgScope *Parent = NULL; + if (GetConcreteScope) { DILocation IL(InlinedAt); - assert (!IL.isNull() && "Invalid InlindAt location!"); - ValueMap<MDNode *, DbgScope *>::iterator DSI = - DbgScopeMap.find(IL.getScope().getNode()); - assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!"); - Parent = DSI->second; - } else { - DIDescriptor Scope(N); - if (Scope.isCompileUnit()) { - return NULL; - } else if (Scope.isSubprogram()) { - DISubprogram SP(N); - DIDescriptor ParentDesc = SP.getContext(); - if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit()) - Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(N); - DIDescriptor ParentDesc = DB.getContext(); - if (!ParentDesc.isNull()) - Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt); - } else - assert (0 && "Unexpected scope info"); - } - - DbgScope *NScope = new DbgScope(Parent, DIDescriptor(N), InlinedAt); + Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, + IL.getOrigLocation().getNode()); + assert (Parent && "Unable to find Parent scope!"); + NScope->setParent(Parent); + Parent->AddScope(NScope); + } else if (DIDescriptor(N).isLexicalBlock()) { + DILexicalBlock DB(N); + if (!DB.getContext().isNull()) { + Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt); + NScope->setParent(Parent); + Parent->AddScope(NScope); + } + } + NScope->setFirstInsn(MI); - if (Parent) - Parent->AddScope(NScope); - else - // First function is top level function. - if (!FunctionDbgScope) - FunctionDbgScope = NScope; + if (!Parent && !InlinedAt) { + StringRef SPName = DISubprogram(N).getLinkageName(); + if (SPName == MF->getFunction()->getName()) + CurrentFnDbgScope = NScope; + } + + if (GetConcreteScope) { + ConcreteScopes[InlinedAt] = NScope; + getOrCreateAbstractScope(N); + } - DbgScopeMap.insert(std::make_pair(N, NScope)); return NScope; } +DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { + assert (N && "Invalid Scope encoding!"); -/// getOrCreateScope - Returns the scope associated with the given descriptor. -/// FIXME - Remove this method. -DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) { - DbgScope *&Slot = DbgScopeMap[N]; - if (Slot) return Slot; - + DbgScope *AScope = AbstractScopes.lookup(N); + if (AScope) + return AScope; + DbgScope *Parent = NULL; - DILexicalBlock Block(N); - // Don't create a new scope if we already created one for an inlined function. - DenseMap<const MDNode *, DbgScope *>::iterator - II = AbstractInstanceRootMap.find(N); - if (II != AbstractInstanceRootMap.end()) - return LexicalScopeStack.back(); - - if (!Block.isNull()) { - DIDescriptor ParentDesc = Block.getContext(); - Parent = - ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getNode()); + DIDescriptor Scope(N); + if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + if (!ParentDesc.isNull()) + Parent = getOrCreateAbstractScope(ParentDesc.getNode()); } - Slot = new DbgScope(Parent, DIDescriptor(N)); + AScope = new DbgScope(Parent, DIDescriptor(N), NULL); if (Parent) - Parent->AddScope(Slot); - else - // First function is top level function. - FunctionDbgScope = Slot; + Parent->AddScope(AScope); + AScope->setAbstractScope(); + AbstractScopes[N] = AScope; + if (DIDescriptor(N).isSubprogram()) + AbstractScopesList.push_back(AScope); + return AScope; +} + +static DISubprogram getDISubprogram(MDNode *N) { - return Slot; + DIDescriptor D(N); + if (D.isNull()) + return DISubprogram(); + + if (D.isCompileUnit()) + return DISubprogram(); + + if (D.isSubprogram()) + return DISubprogram(N); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(N).getContext().getNode()); + + llvm_unreachable("Unexpected Descriptor!"); } -/// ConstructDbgScope - Construct the components of a scope. -/// -void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope, - unsigned ParentStartID, - unsigned ParentEndID, - DIE *ParentDie, CompileUnit *Unit) { - // Add variables to scope. - SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables(); - for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit); - if (VariableDie) ParentDie->AddChild(VariableDie); - } +DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) { + + DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode); + assert (SPDie && "Unable to find subprogram DIE!"); + AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("func_begin", SubprogramCount)); + AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("func_end", SubprogramCount)); + MachineLocation Location(RI->getFrameRegister(*MF)); + AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + + if (!DISubprogram(SPNode).isLocalToUnit()) + AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + + // If there are global variables at this scope then add their dies. + for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), + SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { + MDNode *N = dyn_cast_or_null<MDNode>(*SGI); + if (!N) continue; + DIGlobalVariable GV(N); + if (GV.getContext().getNode() == SPNode) { + DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); + if (ScopedGVDie) + SPDie->AddChild(ScopedGVDie); + } + } + return SPDie; +} + +DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) { + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) + return NULL; - // Add concrete instances to scope. - SmallVector<DbgConcreteScope *, 8> &ConcreteInsts = - ParentScope->getConcreteInsts(); - for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) { - DbgConcreteScope *ConcreteInst = ConcreteInsts[i]; - DIE *Die = ConcreteInst->getDie(); + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; - unsigned StartID = ConcreteInst->getStartLabelID(); - unsigned EndID = ConcreteInst->getEndLabelID(); + AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + StartID ? + DWLabel("label", StartID) + : DWLabel("func_begin", SubprogramCount)); + AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + EndID ? + DWLabel("label", EndID) + : DWLabel("func_end", SubprogramCount)); - // Add the scope bounds. - if (StartID) - AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("label", StartID)); - else - AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); - if (EndID) - AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("label", EndID)); - else - AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); - ParentDie->AddChild(Die); - } + return ScopeDIE; +} - // Add nested scopes. - SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes(); - for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { - // Define the Scope debug information entry. - DbgScope *Scope = Scopes[j]; +DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) { + unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); + unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + assert (StartID && "Invalid starting label for an inlined scope!"); + assert (EndID && "Invalid end label for an inlined scope!"); + // Ignore empty scopes. + if (StartID == EndID && StartID != 0) + return NULL; - unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); - unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); + DIScope DS(Scope->getScopeNode()); + if (DS.isNull()) + return NULL; + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - // Ignore empty scopes. - if (StartID == EndID && StartID != 0) continue; + DISubprogram InlinedSP = getDISubprogram(DS.getNode()); + DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode()); + assert (OriginDIE && "Unable to find Origin DIE!"); + AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); - // Do not ignore inlined scopes even if they don't have any variables or - // scopes. - if (Scope->getScopes().empty() && Scope->getVariables().empty() && - Scope->getConcreteInsts().empty()) - continue; + AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + DWLabel("label", StartID)); + AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + DWLabel("label", EndID)); - if (StartID == ParentStartID && EndID == ParentEndID) { - // Just add stuff to the parent scope. - ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit); - } else { - DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block); + InlinedSubprogramDIEs.insert(OriginDIE); - // Add the scope bounds. - if (StartID) - AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("label", StartID)); - else - AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); + // Track the start label for this inlined function. + ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + I = InlineInfo.find(InlinedSP.getNode()); - if (EndID) - AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("label", EndID)); - else - AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); + if (I == InlineInfo.end()) { + InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE)); + InlinedSPNodes.push_back(InlinedSP.getNode()); + } else + I->second.push_back(std::make_pair(StartID, ScopeDIE)); - // Add the scope's contents. - ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit); - ParentDie->AddChild(ScopeDie); - } - } + StringPool.insert(InlinedSP.getName()); + StringPool.insert(InlinedSP.getLinkageName()); + DILocation DL(Scope->getInlinedAt()); + AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); + AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + + return ScopeDIE; } -/// ConstructFunctionDbgScope - Construct the scope for the subprogram. -/// -void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, - bool AbstractScope) { - // Exit if there is no root scope. - if (!RootScope) return; - DIDescriptor Desc = RootScope->getDesc(); - if (Desc.isNull()) - return; +DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, + DbgScope *Scope, CompileUnit *Unit) { + // Get the descriptor. + const DIVariable &VD = DV->getVariable(); + const char *Name = VD.getName(); + if (!Name) + return NULL; - // Get the subprogram debug information entry. - DISubprogram SPD(Desc.getNode()); - - // Get the subprogram die. - DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); - if (!SPDie) { - ConstructSubprogram(SPD.getNode()); - SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); - } - assert(SPDie && "Missing subprogram descriptor"); - - if (!AbstractScope) { - // Add the function bounds. - AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); - AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); - MachineLocation Location(RI->getFrameRegister(*MF)); - AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); - } - - ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU); - // If there are global variables at this scope then add their dies. - for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), - SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { - MDNode *N = dyn_cast_or_null<MDNode>(*SGI); - if (!N) continue; - DIGlobalVariable GV(N); - if (GV.getContext().getNode() == RootScope->getDesc().getNode()) { - DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); - SPDie->AddChild(ScopedGVDie); - } + // Translate tag to proper Dwarf tag. The result variable is dropped for + // now. + unsigned Tag; + switch (VD.getTag()) { + case dwarf::DW_TAG_return_variable: + return NULL; + case dwarf::DW_TAG_arg_variable: + Tag = dwarf::DW_TAG_formal_parameter; + break; + case dwarf::DW_TAG_auto_variable: // fall thru + default: + Tag = dwarf::DW_TAG_variable; + break; } -} -/// ConstructDefaultDbgScope - Construct a default scope for the subprogram. -/// -void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { - StringMap<DIE*> &Globals = ModuleCU->getGlobals(); - StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName()); - if (GI != Globals.end()) { - DIE *SPDie = GI->second; + // Define variable debug information entry. + DIE *VariableDie = new DIE(Tag); - // Add the function bounds. - AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - DWLabel("func_begin", SubprogramCount)); - AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - DWLabel("func_end", SubprogramCount)); - MachineLocation Location(RI->getFrameRegister(*MF)); - AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); + DIE *AbsDIE = NULL; + if (DbgVariable *AV = DV->getAbstractVariable()) + AbsDIE = AV->getDIE(); + + if (AbsDIE) { + DIScope DS(Scope->getScopeNode()); + DISubprogram InlinedSP = getDISubprogram(DS.getNode()); + DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode()); + (void) OriginSPDIE; + assert (OriginSPDIE && "Unable to find Origin DIE for the SP!"); + DIE *AbsDIE = DV->getAbstractVariable()->getDIE(); + assert (AbsDIE && "Unable to find Origin DIE for the Variable!"); + AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); } + else { + AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + AddSourceLine(VariableDie, &VD); + + // Add variable type. + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (VD.isBlockByrefVariable()) + AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); + else + AddType(Unit, VariableDie, VD.getType()); + } + + // Add variable address. + if (!Scope->isAbstractScope()) { + MachineLocation Location; + Location.set(RI->getFrameRegister(*MF), + RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); + + + if (VD.hasComplexAddress()) + AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + AddAddress(VariableDie, dwarf::DW_AT_location, Location); + } + DV->setDIE(VariableDie); + return VariableDie; + +} +DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) { + if (!Scope) + return NULL; + DIScope DS(Scope->getScopeNode()); + if (DS.isNull()) + return NULL; + + DIE *ScopeDIE = NULL; + if (Scope->getInlinedAt()) + ScopeDIE = ConstructInlinedScopeDIE(Scope); + else if (DS.isSubprogram()) { + if (Scope->isAbstractScope()) + ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode()); + else + ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode()); + } + else { + ScopeDIE = ConstructLexicalScopeDIE(Scope); + if (!ScopeDIE) return NULL; + } + + // Add variables to scope. + SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU); + if (VariableDIE) + ScopeDIE->AddChild(VariableDIE); + } + + // Add nested scopes. + SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { + // Define the Scope debug information entry. + DIE *NestedDIE = ConstructScopeDIE(Scopes[j]); + if (NestedDIE) + ScopeDIE->AddChild(NestedDIE); + } + return ScopeDIE; } /// GetOrCreateSourceID - Look up the source id with the given directory and @@ -1680,6 +1757,9 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { if (TimePassesIsEnabled) DebugTimer->startTimer(); + if (!MAI->doesSupportDebugInformation()) + return; + DebugInfoFinder DbgFinder; DbgFinder.processModule(*M); @@ -1710,7 +1790,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { ConstructGlobalVariableDIE(*I); } - // Create DIEs for each of the externally visible subprograms. + // Create DIEs for each subprogram. for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), E = DbgFinder.subprogram_end(); I != E; ++I) ConstructSubprogram(*I); @@ -1754,6 +1834,13 @@ void DwarfDebug::EndModule() { if (TimePassesIsEnabled) DebugTimer->startTimer(); + // Attach DW_AT_inline attribute with inlined subprogram DIEs. + for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), + AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { + DIE *ISP = *AI; + AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } + // Standard sections final addresses. Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); EmitLabel("text_end", 0); @@ -1811,55 +1898,102 @@ void DwarfDebug::EndModule() { DebugTimer->stopTimer(); } +/// findAbstractVariable - Find abstract variable, if any, associated with Var. +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx, + DILocation &ScopeLoc) { + + DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode()); + if (AbsDbgVariable) + return AbsDbgVariable; + + DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode()); + if (!Scope) + return NULL; + + AbsDbgVariable = new DbgVariable(Var, FrameIdx); + Scope->AddVariable(AbsDbgVariable); + AbstractVariables[Var.getNode()] = AbsDbgVariable; + return AbsDbgVariable; +} + /// CollectVariableInfo - Populate DbgScope entries with variables' info. void DwarfDebug::CollectVariableInfo() { if (!MMI) return; + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { MetadataBase *MB = VI->first; MDNode *Var = dyn_cast_or_null<MDNode>(MB); + if (!Var) continue; DIVariable DV (Var); - if (DV.isNull()) continue; - unsigned VSlot = VI->second; - DbgScope *Scope = NULL; - ValueMap<MDNode *, DbgScope *>::iterator DSI = - DbgScopeMap.find(DV.getContext().getNode()); - if (DSI != DbgScopeMap.end()) - Scope = DSI->second; - else - // There is not any instruction assocated with this scope, so get - // a new scope. - Scope = getDbgScope(DV.getContext().getNode(), - NULL /* Not an instruction */, - NULL /* Not inlined */); - assert (Scope && "Unable to find variable scope!"); - Scope->AddVariable(new DbgVariable(DV, VSlot, false)); - } -} - -/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that -/// start with this machine instruction. -void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) { + std::pair< unsigned, MDNode *> VP = VI->second; + DILocation ScopeLoc(VP.second); + + DbgScope *Scope = + ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode()); + if (!Scope) + Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + DbgVariable *RegVar = new DbgVariable(DV, VP.first); + Scope->AddVariable(RegVar); + if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc)) + RegVar->setAbstractVariable(AbsDbgVariable); + } +} + +/// BeginScope - Process beginning of a scope starting at Label. +void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) { InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); if (I == DbgScopeBeginMap.end()) return; - SmallVector<DbgScope *, 2> &SD = I->second; - for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); + ScopeVector &SD = DbgScopeBeginMap[MI]; + for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end(); SDI != SDE; ++SDI) (*SDI)->setStartLabelID(Label); } -/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that -/// end with this machine instruction. -void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) { +/// EndScope - Process end of a scope. +void DwarfDebug::EndScope(const MachineInstr *MI) { InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); if (I == DbgScopeEndMap.end()) return; + + unsigned Label = MMI->NextLabelID(); + Asm->printLabel(Label); + SmallVector<DbgScope *, 2> &SD = I->second; for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); SDI != SDE; ++SDI) (*SDI)->setEndLabelID(Label); + return; +} + +/// createDbgScope - Create DbgScope for the scope. +void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { + + if (!InlinedAt) { + DbgScope *WScope = DbgScopeMap.lookup(Scope); + if (WScope) + return; + WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); + DbgScopeMap.insert(std::make_pair(Scope, WScope)); + if (DIDescriptor(Scope).isLexicalBlock()) + createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); + return; + } + + DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); + if (WScope) + return; + + WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt); + DbgScopeMap.insert(std::make_pair(InlinedAt, WScope)); + DILocation DL(InlinedAt); + createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); } /// ExtractScopeInformation - Scan machine instructions in this function @@ -1870,26 +2004,41 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (!DbgScopeMap.empty()) return false; - // Scan each instruction and create scopes. + // Scan each instruction and create scopes. First build working set of scopes. for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) { for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; DebugLoc DL = MInsn->getDebugLoc(); - if (DL.isUnknown()) - continue; + if (DL.isUnknown()) continue; DebugLocTuple DLT = MF->getDebugLocTuple(DL); - if (!DLT.Scope) - continue; + if (!DLT.Scope) continue; // There is no need to create another DIE for compile unit. For all // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. - DIDescriptor D(DLT.Scope); - if (!D.isCompileUnit()) { - DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc); - Scope->setLastInsn(MInsn); - } + if (DIDescriptor(DLT.Scope).isCompileUnit()) continue; + createDbgScope(DLT.Scope, DLT.InlinedAtLoc); + } + } + + + // Build scope hierarchy using working set of scopes. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) continue; + DebugLocTuple DLT = MF->getDebugLocTuple(DL); + if (!DLT.Scope) continue; + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + if (DIDescriptor(DLT.Scope).isCompileUnit()) continue; + DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc); + Scope->setLastInsn(MInsn); } } @@ -1897,8 +2046,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { // last instruction as this scope's last instrunction. for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), DE = DbgScopeMap.end(); DI != DE; ++DI) { - DbgScope *S = DI->second; - if (!S) continue; + if (DI->second->isAbstractScope()) + continue; assert (DI->second->getFirstInsn() && "Invalid first instruction!"); DI->second->FixInstructionMarkers(); assert (DI->second->getLastInsn() && "Invalid last instruction!"); @@ -1911,7 +2060,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(), DE = DbgScopeMap.end(); DI != DE; ++DI) { DbgScope *S = DI->second; - if (!S) continue; + if (S->isAbstractScope()) + continue; const MachineInstr *MI = S->getFirstInsn(); assert (MI && "DbgScope does not have first instruction!"); @@ -1919,8 +2069,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (IDI != DbgScopeBeginMap.end()) IDI->second.push_back(S); else - DbgScopeBeginMap.insert(std::make_pair(MI, - SmallVector<DbgScope *, 2>(2, S))); + DbgScopeBeginMap[MI].push_back(S); MI = S->getLastInsn(); assert (MI && "DbgScope does not have last instruction!"); @@ -1928,31 +2077,12 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (IDI != DbgScopeEndMap.end()) IDI->second.push_back(S); else - DbgScopeEndMap.insert(std::make_pair(MI, - SmallVector<DbgScope *, 2>(2, S))); + DbgScopeEndMap[MI].push_back(S); } return !DbgScopeMap.empty(); } -static DISubprogram getDISubprogram(MDNode *N) { - - DIDescriptor D(N); - if (D.isNull()) - return DISubprogram(); - - if (D.isCompileUnit()) - return DISubprogram(); - - if (D.isSubprogram()) - return DISubprogram(N); - - if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(N).getContext().getNode()); - - llvm_unreachable("Unexpected Descriptor!"); -} - /// BeginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::BeginFunction(MachineFunction *MF) { @@ -1963,11 +2093,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN if (!ExtractScopeInformation(MF)) return; CollectVariableInfo(); -#endif // Begin accumulating function debug information. MMI->BeginFunction(MF); @@ -1977,7 +2105,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN DebugLoc FDL = MF->getDefaultDebugLoc(); if (!FDL.isUnknown()) { DebugLocTuple DLT = MF->getDebugLocTuple(FDL); @@ -1990,15 +2117,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { Asm->printLabel(LabelID); O << '\n'; } -#else - DebugLoc FDL = MF->getDefaultDebugLoc(); - if (!FDL.isUnknown()) { - DebugLocTuple DLT = MF->getDebugLocTuple(FDL); - unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); - Asm->printLabel(LabelID); - O << '\n'; - } -#endif if (TimePassesIsEnabled) DebugTimer->stopTimer(); } @@ -2011,10 +2129,9 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN if (DbgScopeMap.empty()) return; -#endif + // Define end label for subprogram. EmitLabel("func_end", SubprogramCount); @@ -2029,41 +2146,24 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { Lines.begin(), Lines.end()); } - // Construct the DbgScope for abstract instances. - for (SmallVector<DbgScope *, 32>::iterator - I = AbstractInstanceRootList.begin(), - E = AbstractInstanceRootList.end(); I != E; ++I) - ConstructFunctionDbgScope(*I); + // Construct abstract scopes. + for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), + AE = AbstractScopesList.end(); AI != AE; ++AI) + ConstructScopeDIE(*AI); - // Construct scopes for subprogram. - if (FunctionDbgScope) - ConstructFunctionDbgScope(FunctionDbgScope); - else - // FIXME: This is wrong. We are essentially getting past a problem with - // debug information not being able to handle unreachable blocks that have - // debug information in them. In particular, those unreachable blocks that - // have "region end" info in them. That situation results in the "root - // scope" not being created. If that's the case, then emit a "default" - // scope, i.e., one that encompasses the whole function. This isn't - // desirable. And a better way of handling this (and all of the debugging - // information) needs to be explored. - ConstructDefaultDbgScope(MF); + ConstructScopeDIE(CurrentFnDbgScope); DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, MMI->getFrameMoves())); // Clear debug info - if (FunctionDbgScope) { - delete FunctionDbgScope; + if (CurrentFnDbgScope) { + CurrentFnDbgScope = NULL; DbgScopeMap.clear(); DbgScopeBeginMap.clear(); DbgScopeEndMap.clear(); - DbgAbstractScopeMap.clear(); - DbgConcreteScopeMap.clear(); - FunctionDbgScope = NULL; - LexicalScopeStack.clear(); - AbstractInstanceRootList.clear(); - AbstractInstanceRootMap.clear(); + ConcreteScopes.clear(); + AbstractScopesList.clear(); } Lines.clear(); @@ -2130,201 +2230,6 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, return SrcId; } -/// RecordRegionStart - Indicate the start of a region. -unsigned DwarfDebug::RecordRegionStart(MDNode *N) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DbgScope *Scope = getOrCreateScope(N); - unsigned ID = MMI->NextLabelID(); - if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); - LexicalScopeStack.push_back(Scope); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfDebug::RecordRegionEnd(MDNode *N) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DbgScope *Scope = getOrCreateScope(N); - unsigned ID = MMI->NextLabelID(); - Scope->setEndLabelID(ID); - // FIXME : region.end() may not be in the last basic block. - // For now, do not pop last lexical scope because next basic - // block may start new inlined function's body. - unsigned LSSize = LexicalScopeStack.size(); - if (LSSize != 0 && LSSize != 1) - LexicalScopeStack.pop_back(); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordVariable - Indicate the declaration of a local variable. -void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DIDescriptor Desc(N); - DbgScope *Scope = NULL; - bool InlinedFnVar = false; - - if (Desc.getTag() == dwarf::DW_TAG_variable) - Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode()); - else { - bool InlinedVar = false; - MDNode *Context = DIVariable(N).getContext().getNode(); - DISubprogram SP(Context); - if (!SP.isNull()) { - // SP is inserted into DbgAbstractScopeMap when inlined function - // start was recorded by RecordInlineFnStart. - ValueMap<MDNode *, DbgScope *>::iterator - I = DbgAbstractScopeMap.find(SP.getNode()); - if (I != DbgAbstractScopeMap.end()) { - InlinedVar = true; - Scope = I->second; - } - } - if (!InlinedVar) - Scope = getOrCreateScope(Context); - } - - assert(Scope && "Unable to find the variable's scope"); - DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar); - Scope->AddVariable(DV); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); -} - -//// RecordInlinedFnStart - Indicate the start of inlined subroutine. -unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, - unsigned Line, unsigned Col) { - unsigned LabelID = MMI->NextLabelID(); - - if (!MAI->doesDwarfUsesInlineInfoSection()) - return LabelID; - - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - MDNode *Node = SP.getNode(); - DenseMap<const MDNode *, DbgScope *>::iterator - II = AbstractInstanceRootMap.find(Node); - - if (II == AbstractInstanceRootMap.end()) { - // Create an abstract instance entry for this inlined function if it doesn't - // already exist. - DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node)); - - // Get the compile unit context. - DIE *SPDie = ModuleCU->getDieMapSlotFor(Node); - if (!SPDie) - SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true); - - // Mark as being inlined. This makes this subprogram entry an abstract - // instance root. - // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only - // that it's defined. That probably won't change in the future. However, - // this could be more elegant. - AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined); - - // Keep track of the abstract scope for this function. - DbgAbstractScopeMap[Node] = Scope; - - AbstractInstanceRootMap[Node] = Scope; - AbstractInstanceRootList.push_back(Scope); - } - - // Create a concrete inlined instance for this inlined function. - DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node)); - DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); - ScopeDie->setAbstractCompileUnit(ModuleCU); - - DIE *Origin = ModuleCU->getDieMapSlotFor(Node); - AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, Origin); - AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); - AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line); - AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col); - - ConcreteScope->setDie(ScopeDie); - ConcreteScope->setStartLabelID(LabelID); - MMI->RecordUsedDbgLabel(LabelID); - - LexicalScopeStack.back()->AddConcreteInst(ConcreteScope); - - // Keep track of the concrete scope that's inlined into this function. - ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator - SI = DbgConcreteScopeMap.find(Node); - - if (SI == DbgConcreteScopeMap.end()) - DbgConcreteScopeMap[Node].push_back(ConcreteScope); - else - SI->second.push_back(ConcreteScope); - - // Track the start label for this inlined function. - ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator - I = InlineInfo.find(Node); - - if (I == InlineInfo.end()) - InlineInfo[Node].push_back(LabelID); - else - I->second.push_back(LabelID); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return LabelID; -} - -/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. -unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { - if (!MAI->doesDwarfUsesInlineInfoSection()) - return 0; - - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - MDNode *Node = SP.getNode(); - ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator - I = DbgConcreteScopeMap.find(Node); - - if (I == DbgConcreteScopeMap.end()) { - // FIXME: Can this situation actually happen? And if so, should it? - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return 0; - } - - SmallVector<DbgScope *, 8> &Scopes = I->second; - if (Scopes.empty()) { - // Returned ID is 0 if this is unbalanced "end of inlined - // scope". This could happen if optimizer eats dbg intrinsics - // or "beginning of inlined scope" is not recoginized due to - // missing location info. In such cases, ignore this region.end. - return 0; - } - - DbgScope *Scope = Scopes.back(); Scopes.pop_back(); - unsigned ID = MMI->NextLabelID(); - MMI->RecordUsedDbgLabel(ID); - Scope->setEndLabelID(ID); - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// @@ -2470,10 +2375,7 @@ void DwarfDebug::EmitDIE(DIE *Die) { case dwarf::DW_AT_abstract_origin: { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); - unsigned Addr = - CompileUnitOffsets[Die->getAbstractCompileUnit()] + - Origin->getOffset(); - + unsigned Addr = Origin->getOffset(); Asm->EmitInt32(Addr); break; } @@ -3002,10 +2904,14 @@ void DwarfDebug::EmitDebugInlineInfo() { Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); - for (ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator - I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { - MDNode *Node = I->first; - SmallVector<unsigned, 4> &Labels = I->second; + for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(), + E = InlinedSPNodes.end(); I != E; ++I) { + +// for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator + // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { + MDNode *Node = *I; + ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node); + SmallVector<InlineInfoLabels, 4> &Labels = II->second; DISubprogram SP(Node); const char *LName = SP.getLinkageName(); const char *Name = SP.getName(); @@ -3019,17 +2925,21 @@ void DwarfDebug::EmitDebugInlineInfo() { // __asm__ attribute. if (LName[0] == 1) LName = &LName[1]; - Asm->EmitString(LName); +// Asm->EmitString(LName); + EmitSectionOffset("string", "section_str", + StringPool.idFor(LName), false, true); + } Asm->EOL("MIPS linkage name"); - - Asm->EmitString(Name); Asm->EOL("Function name"); - +// Asm->EmitString(Name); + EmitSectionOffset("string", "section_str", + StringPool.idFor(Name), false, true); + Asm->EOL("Function name"); Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count"); - for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(), + for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { - DIE *SP = ModuleCU->getDieMapSlotFor(Node); + DIE *SP = LI->second; Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset"); if (TD->getPointerSize() == sizeof(int32_t)) @@ -3037,7 +2947,7 @@ void DwarfDebug::EmitDebugInlineInfo() { else O << MAI->getData64bitsDirective(); - PrintLabelName("label", *LI); Asm->EOL("low_pc"); + PrintLabelName("label", LI->first); Asm->EOL("low_pc"); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index ddb0a15ed78d..646de8f36e14 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -30,9 +30,9 @@ namespace llvm { class CompileUnit; -class DbgVariable; -class DbgScope; class DbgConcreteScope; +class DbgScope; +class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; class MCAsmInfo; @@ -41,7 +41,7 @@ class Timer; //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. /// -class VISIBILITY_HIDDEN SrcLineInfo { +class SrcLineInfo { unsigned Line; // Source line number. unsigned Column; // Source column. unsigned SourceID; // Source ID number. @@ -57,7 +57,7 @@ public: unsigned getLabelID() const { return LabelID; } }; -class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { +class DwarfDebug : public Dwarf { //===--------------------------------------------------------------------===// // Attributes used to construct specific Dwarf sections. // @@ -134,52 +134,52 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// bool shouldEmit; - // FunctionDbgScope - Top level scope for the current function. + // CurrentFnDbgScope - Top level scope for the current function. // - DbgScope *FunctionDbgScope; + DbgScope *CurrentFnDbgScope; /// DbgScopeMap - Tracks the scopes in the current function. + /// ValueMap<MDNode *, DbgScope *> DbgScopeMap; + /// ConcreteScopes - Tracks the concrete scopees in the current function. + /// These scopes are also included in DbgScopeMap. + ValueMap<MDNode *, DbgScope *> ConcreteScopes; + + /// AbstractScopes - Tracks the abstract scopes a module. These scopes are + /// not included DbgScopeMap. + ValueMap<MDNode *, DbgScope *> AbstractScopes; + SmallVector<DbgScope *, 4>AbstractScopesList; + + /// AbstractVariables - Collection on abstract variables. + ValueMap<MDNode *, DbgVariable *> AbstractVariables; + + /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked + /// (at the end of the module) as DW_AT_inline. + SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + + /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs. + SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs; + /// ScopedGVs - Tracks global variables that are not at file scope. /// For example void f() { static int b = 42; } SmallVector<WeakVH, 4> ScopedGVs; - typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> > + typedef SmallVector<DbgScope *, 2> ScopeVector; + typedef DenseMap<const MachineInstr *, ScopeVector> InsnToDbgScopeMapTy; - /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts. + /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts. InsnToDbgScopeMapTy DbgScopeBeginMap; /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends. InsnToDbgScopeMapTy DbgScopeEndMap; - /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current - /// function. - ValueMap<MDNode *, DbgScope *> DbgAbstractScopeMap; - - /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current - /// function. - ValueMap<MDNode *, - SmallVector<DbgScope *, 8> > DbgConcreteScopeMap; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. - ValueMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo; - - /// AbstractInstanceRootMap - Map of abstract instance roots of inlined - /// functions. These are subroutine entries that contain a DW_AT_inline - /// attribute. - DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap; - - /// AbstractInstanceRootList - List of abstract instance roots of inlined - /// functions. These are subroutine entries that contain a DW_AT_inline - /// attribute. - SmallVector<DbgScope *, 32> AbstractInstanceRootList; - - /// LexicalScopeStack - A stack of lexical scopes. The top one is the current - /// scope. - SmallVector<DbgScope *, 16> LexicalScopeStack; + typedef std::pair<unsigned, DIE *> InlineInfoLabels; + ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo; + SmallVector<MDNode *, 4> InlinedSPNodes; /// CompileUnitOffsets - A vector of the offsets of the compile units. This is /// used when calculating the "origin" of a concrete instance of an inlined @@ -361,10 +361,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); - /// getDbgScope - Returns the scope associated with the given descriptor. - /// - DbgScope *getOrCreateScope(MDNode *N); - DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt); + /// getUpdatedDbgScope - Find or create DbgScope assicated with + /// the instruction. Initialize scope and update scope hierarchy. + DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt); + + /// createDbgScope - Create DbgScope for the scope. + void createDbgScope(MDNode *Scope, MDNode *InlinedAt); + + DbgScope *getOrCreateAbstractScope(MDNode *N); + + /// findAbstractVariable - Find abstract variable associated with Var. + DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, + DILocation &Loc); + + DIE *UpdateSubprogramScopeDIE(MDNode *SPNode); + DIE *ConstructLexicalScopeDIE(DbgScope *Scope); + DIE *ConstructScopeDIE(DbgScope *Scope); + DIE *ConstructInlinedScopeDIE(DbgScope *Scope); + DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit); /// ConstructDbgScope - Construct the components of a scope. /// @@ -372,15 +386,6 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { unsigned ParentStartID, unsigned ParentEndID, DIE *ParentDie, CompileUnit *Unit); - /// ConstructFunctionDbgScope - Construct the scope for the subprogram. - /// - void ConstructFunctionDbgScope(DbgScope *RootScope, - bool AbstractScope = false); - - /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. - /// - void ConstructDefaultDbgScope(MachineFunction *MF); - /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc /// tools to recognize the object file contains Dwarf information. void EmitInitial(); @@ -535,22 +540,6 @@ public: unsigned getOrCreateSourceID(const std::string &DirName, const std::string &FileName); - /// RecordRegionStart - Indicate the start of a region. - unsigned RecordRegionStart(MDNode *N); - - /// RecordRegionEnd - Indicate the end of a region. - unsigned RecordRegionEnd(MDNode *N); - - /// RecordVariable - Indicate the declaration of a local variable. - void RecordVariable(MDNode *N, unsigned FrameIndex); - - //// RecordInlinedFnStart - Indicate the start of inlined subroutine. - unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, - unsigned Line, unsigned Col); - - /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. - unsigned RecordInlinedFnEnd(DISubprogram &SP); - /// ExtractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. bool ExtractScopeInformation(MachineFunction *MF); @@ -558,15 +547,16 @@ public: /// CollectVariableInfo - Populate DbgScope entries with variables' info. void CollectVariableInfo(); - /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that - /// start with this machine instruction. - void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label); - /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that /// end with this machine instruction. void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label); -}; + /// BeginScope - Process beginning of a scope starting at Label. + void BeginScope(const MachineInstr *MI, unsigned Label); + + /// EndScope - Prcess end of a scope. + void EndScope(const MachineInstr *MI); +}; } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 6c03b559b77b..1c8b8f464720 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -74,6 +74,25 @@ unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) { return 0; } +/// CreateLabelDiff - Emit a label and subtract it from the expression we +/// already have. This is equivalent to emitting "foo - .", but we have to emit +/// the label for "." directly. +const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef, + const char *LabelName, + unsigned Index) { + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << LabelName << Asm->getFunctionNumber() + << "_" << Index; + MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str()); + Asm->OutStreamer.EmitLabel(DotSym); + + return MCBinaryExpr::CreateSub(ExprRef, + MCSymbolRefExpr::Create(DotSym, + Asm->OutContext), + Asm->OutContext); +} + /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that /// is shared among many Frame Description Entries. There is at least one CIE /// in every non-empty .debug_frame section. @@ -176,24 +195,10 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // If there is a personality, we need to indicate the function's location. if (PersonalityRef) { - // If the reference to the personality function symbol is not already - // pc-relative, then we need to subtract our current address from it. Do - // this by emitting a label and subtracting it from the expression we - // already have. This is equivalent to emitting "foo - .", but we have to - // emit the label for "." directly. - if (!IsPersonalityPCRel) { - SmallString<64> Name; - raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() - << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index; - MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str()); - Asm->OutStreamer.EmitLabel(DotSym); - - PersonalityRef = - MCBinaryExpr::CreateSub(PersonalityRef, - MCSymbolRefExpr::Create(DotSym,Asm->OutContext), - Asm->OutContext); - } - + if (!IsPersonalityPCRel) + PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr", + Index); + O << MAI->getData32bitsDirective(); PersonalityRef->print(O, MAI); Asm->EOL("Personality"); @@ -232,11 +237,16 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { // corresponding function is static, this should not be externally visible. if (!TheFunc->hasLocalLinkage()) if (const char *GlobalEHDirective = MAI->getGlobalEHDirective()) - O << GlobalEHDirective << EHFrameInfo.FnName << "\n"; + O << GlobalEHDirective << EHFrameInfo.FnName << '\n'; // If corresponding function is weak definition, this should be too. if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective()) - O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; + O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << '\n'; + + // If corresponding function is hidden, this should be too. + if (TheFunc->hasHiddenVisibility()) + if (const char *HiddenDirective = MAI->getHiddenDirective()) + O << HiddenDirective << EHFrameInfo.FnName << '\n' ; // If there are no calls then you can't unwind. This may mean we can omit the // EH Frame, but some environments do not handle weak absolute symbols. If @@ -457,6 +467,39 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, return SizeActions; } +/// CallToNoUnwindFunction - Return `true' if this is a call to a function +/// marked `nounwind'. Return `false' otherwise. +bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { + assert(MI->getDesc().isCall() && "This should be a call instruction!"); + + bool MarkedNoUnwind = false; + bool SawFunc = false; + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + if (MO.isGlobal()) { + if (Function *F = dyn_cast<Function>(MO.getGlobal())) { + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; + } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; + } + } + } + + return MarkedNoUnwind; +} + /// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke /// has a try-range containing the call, a non-zero landing pad, and an /// appropriate action. The entry for an ordinary call has a try-range @@ -485,7 +528,9 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); MI != E; ++MI) { if (!MI->isLabel()) { - SawPotentiallyThrowing |= MI->getDesc().isCall(); + if (MI->getDesc().isCall()) + SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); + continue; } @@ -497,7 +542,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, SawPotentiallyThrowing = false; // Beginning of a new try-range? - RangeMapType::iterator L = PadMap.find(BeginLabel); + RangeMapType::const_iterator L = PadMap.find(BeginLabel); if (L == PadMap.end()) // Nope, it was just some random label. continue; diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f6f50255f2e7..aff1665e9b97 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -25,13 +25,14 @@ namespace llvm { struct LandingPadInfo; class MachineModuleInfo; class MCAsmInfo; +class MCExpr; class Timer; class raw_ostream; //===----------------------------------------------------------------------===// /// DwarfException - Emits Dwarf exception handling directives. /// -class VISIBILITY_HIDDEN DwarfException : public Dwarf { +class DwarfException : public Dwarf { struct FunctionEHFrameInfo { std::string FnName; unsigned Number; @@ -155,6 +156,10 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions); + /// CallToNoUnwindFunction - Return `true' if this is a call to a function + /// marked `nounwind'. Return `false' otherwise. + bool CallToNoUnwindFunction(const MachineInstr *MI); + /// ComputeCallSiteTable - Compute the call-site table. The entry for an /// invoke has a try-range containing the call, a non-zero landing pad and an /// appropriate action. The entry for an ordinary call has a try-range @@ -168,6 +173,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { const SmallVectorImpl<unsigned> &FirstActions); void EmitExceptionTable(); + /// CreateLabelDiff - Emit a label and subtract it from the expression we + /// already have. This is equivalent to emitting "foo - .", but we have to + /// emit the label for "." directly. + const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName, + unsigned Index); public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h index 33ebb3bd0eb5..dedd695392e3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -29,7 +29,7 @@ namespace llvm { class TargetData; class TargetRegisterInfo; - class VISIBILITY_HIDDEN Dwarf { + class Dwarf { protected: //===-------------------------------------------------------------==---===// // Core attributes used by the DWARF printer. diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index 0638d3568549..63ae65368058 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -81,47 +81,20 @@ unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, return DD->RecordSourceLine(Line, Col, Scope); } -/// RecordRegionStart - Indicate the start of a region. -unsigned DwarfWriter::RecordRegionStart(MDNode *N) { - return DD->RecordRegionStart(N); -} - -/// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfWriter::RecordRegionEnd(MDNode *N) { - return DD->RecordRegionEnd(N); -} - /// getRecordSourceLineCount - Count source lines. unsigned DwarfWriter::getRecordSourceLineCount() { return DD->getRecordSourceLineCount(); } -/// RecordVariable - Indicate the declaration of a local variable. -/// -void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) { - DD->RecordVariable(N, FrameIndex); -} - /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should /// be emitted. bool DwarfWriter::ShouldEmitDwarfDebug() const { return DD && DD->ShouldEmitDwarfDebug(); } -//// RecordInlinedFnStart -unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU, - unsigned Line, unsigned Col) { - return DD->RecordInlinedFnStart(SP, CU, Line, Col); -} - -/// RecordInlinedFnEnd - Indicate the end of inlined subroutine. -unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) { - return DD->RecordInlinedFnEnd(SP); -} - -void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) { - DD->SetDbgScopeEndLabels(MI, L); +void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) { + DD->BeginScope(MI, L); } -void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) { - DD->SetDbgScopeBeginLabels(MI, L); +void DwarfWriter::EndScope(const MachineInstr *MI) { + DD->EndScope(MI); } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index baea9642d4fd..94bfb7204ba2 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include <algorithm> @@ -40,18 +41,38 @@ using namespace llvm; STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); -static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); // Throttle for huge numbers of predecessors (compile speed problems) static cl::opt<unsigned> -TailMergeThreshold("tail-merge-threshold", +TailMergeThreshold("tail-merge-threshold", cl::desc("Max number of predecessors to consider tail merging"), cl::init(150), cl::Hidden); +// Heuristic for tail merging (and, inversely, tail duplication). +// TODO: This should be replaced with a target query. +static cl::opt<unsigned> +TailMergeSize("tail-merge-size", + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); + +namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass, + public BranchFolder { + public: + static char ID; + explicit BranchFolderPass(bool defaultEnableTailMerge) + : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + }; +} char BranchFolderPass::ID = 0; -FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { +FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { return new BranchFolderPass(DefaultEnableTailMerge); } @@ -63,7 +84,6 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { } - BranchFolder::BranchFolder(bool defaultEnableTailMerge) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; @@ -77,12 +97,12 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge) { void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(errs() << "\nRemoving MBB: " << *MBB); - + MachineFunction *MF = MBB->getParent(); // drop all successors. while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_end()-1); - + // If there are any labels in the basic block, unregister them from // MachineModuleInfo. if (MMI && !MBB->empty()) { @@ -93,7 +113,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { MMI->InvalidateLabel(I->getOperand(0).getImm()); } } - + // Remove the block. MF->erase(MBB); } @@ -182,6 +202,11 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= MadeChangeThisIteration; } + // Do tail duplication once after tail merging is done. Otherwise it is + // tough to avoid situations where tail duplication and tail merging undo + // each other's transformations ad infinitum. + MadeChange |= TailDuplicateBlocks(MF); + // See if any jump tables have become mergable or dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); @@ -190,7 +215,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Figure out how these jump tables should be merged. std::vector<unsigned> JTMapping; JTMapping.reserve(JTs.size()); - + // We always keep the 0th jump table. JTMapping.push_back(0); @@ -202,7 +227,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, else JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); } - + // If a jump table was merge with another one, walk the function rewriting // references to jump tables to reference the new JT ID's. Keep track of // whether we see a jump table idx, if not, we can delete the JT. @@ -221,7 +246,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, JTIsLive.set(NewIdx); } } - + // Finally, remove dead jump tables. This happens either because the // indirect jump was unreachable (and thus deleted) or because the jump // table was merged with some other one. @@ -245,7 +270,7 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { unsigned Hash = MI->getOpcode(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &Op = MI->getOperand(i); - + // Merge in bits from the operand if easy. unsigned OperandHash = 0; switch (Op.getType()) { @@ -267,31 +292,30 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { break; default: break; } - + Hash += ((OperandHash << 3) | Op.getType()) << (i&31); } return Hash; } /// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks -/// with no successors, we hash two instructions, because cross-jumping -/// only saves code when at least two instructions are removed (since a +/// with no successors, we hash two instructions, because cross-jumping +/// only saves code when at least two instructions are removed (since a /// branch must be inserted). For blocks with a successor, one of the /// two blocks to be tail-merged will end with a branch already, so /// it gains to cross-jump even for one instruction. - static unsigned HashEndOfMBB(const MachineBasicBlock *MBB, unsigned minCommonTailLength) { MachineBasicBlock::const_iterator I = MBB->end(); if (I == MBB->begin()) return 0; // Empty MBB. - + --I; unsigned Hash = HashMachineInstr(I); - + if (I == MBB->begin() || minCommonTailLength == 1) return Hash; // Single instr MBB. - + --I; // Hash in the second-to-last instruction. Hash ^= HashMachineInstr(I) << 2; @@ -307,11 +331,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, MachineBasicBlock::iterator &I2) { I1 = MBB1->end(); I2 = MBB2->end(); - + unsigned TailLen = 0; while (I1 != MBB1->begin() && I2 != MBB2->begin()) { --I1; --I2; - if (!I1->isIdenticalTo(I2) || + if (!I1->isIdenticalTo(I2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler @@ -332,11 +356,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { MachineBasicBlock *OldBB = OldInst->getParent(); - + // Remove all the old successors of OldBB from the CFG. while (!OldBB->succ_empty()) OldBB->removeSuccessor(OldBB->succ_begin()); - + // Remove all the dead instructions from the end of OldBB. OldBB->erase(OldInst, OldBB->end()); @@ -361,10 +385,10 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Move all the successors of this block to the specified block. NewMBB->transferSuccessors(&CurMBB); - + // Add an edge from CurMBB to NewMBB for the fall-through. CurMBB.addSuccessor(NewMBB); - + // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); @@ -375,7 +399,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, RS->forward(prior(CurMBB.end())); BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++) + for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) if (RegsLiveAtExit[i]) NewMBB->addLiveIn(i); } @@ -404,8 +428,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, // branches temporarily for tail merging). In the case where CurMBB ends // with a conditional branch to the next block, optimize by reversing the // test and conditionally branching to SuccMBB instead. - -static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, +static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); @@ -425,24 +448,43 @@ static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB, TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>()); } -static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p, - const std::pair<unsigned,MachineBasicBlock*> &q) { - if (p.first < q.first) - return true; - else if (p.first > q.first) - return false; - else if (p.second->getNumber() < q.second->getNumber()) - return true; - else if (p.second->getNumber() > q.second->getNumber()) - return false; - else { - // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing - // an object with itself. +bool +BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { + if (getHash() < o.getHash()) + return true; + else if (getHash() > o.getHash()) + return false; + else if (getBlock()->getNumber() < o.getBlock()->getNumber()) + return true; + else if (getBlock()->getNumber() > o.getBlock()->getNumber()) + return false; + else { + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. #ifndef _GLIBCXX_DEBUG - llvm_unreachable("Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #endif - return false; + return false; + } +} + +/// CountTerminators - Count the number of terminators in the given +/// block and set I to the position of the first non-terminator, if there +/// is one, or MBB->end() otherwise. +static unsigned CountTerminators(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &I) { + I = MBB->end(); + unsigned NumTerms = 0; + for (;;) { + if (I == MBB->begin()) { + I = MBB->end(); + break; } + --I; + if (!I->getDesc().isTerminator()) break; + ++NumTerms; + } + return NumTerms; } /// ProfitableToMerge - Check if two machine basic blocks have a common tail @@ -454,21 +496,52 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, unsigned minCommonTailLength, unsigned &CommonTailLen, MachineBasicBlock::iterator &I1, - MachineBasicBlock::iterator &I2) { + MachineBasicBlock::iterator &I2, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); MachineFunction *MF = MBB1->getParent(); - if (CommonTailLen >= minCommonTailLength) - return true; - if (CommonTailLen == 0) return false; - // If we are optimizing for code size, 1 instruction in common is enough if - // we don't have to split a block. At worst we will be replacing a - // fallthrough into the common tail with a branch, which at worst breaks - // even with falling through into the duplicated common tail. - if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && + // It's almost always profitable to merge any number of non-terminator + // instructions with the block that falls through into the common successor. + if (MBB1 == PredBB || MBB2 == PredBB) { + MachineBasicBlock::iterator I; + unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); + if (CommonTailLen > NumTerms) + return true; + } + + // If one of the blocks can be completely merged and happens to be in + // a position where the other could fall through into it, merge any number + // of instructions, because it can be done without a branch. + // TODO: If the blocks are not adjacent, move one of them so that they are? + if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + return true; + if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + return true; + + // If both blocks have an unconditional branch temporarily stripped out, + // count that as an additional common instruction for the following + // heuristics. + unsigned EffectiveTailLen = CommonTailLen; + if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + !MBB1->back().getDesc().isBarrier() && + !MBB2->back().getDesc().isBarrier()) + ++EffectiveTailLen; + + // Check if the common tail is long enough to be worthwhile. + if (EffectiveTailLen >= minCommonTailLength) + return true; + + // If we are optimizing for code size, 2 instructions in common is enough if + // we don't have to split a block. At worst we will be introducing 1 new + // branch instruction, which is likely to be smaller than the 2 + // instructions that would be deleted in the merge. + if (EffectiveTailLen >= 2 && + MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; @@ -476,40 +549,44 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, } /// ComputeSameTails - Look through all the blocks in MergePotentials that have -/// hash CurHash (guaranteed to match the last element). Build the vector +/// hash CurHash (guaranteed to match the last element). Build the vector /// SameTails of all those that have the (same) largest number of instructions /// in common of any pair of these blocks. SameTails entries contain an -/// iterator into MergePotentials (from which the MachineBasicBlock can be -/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the /// instruction where the matching code sequence begins. /// Order of elements in SameTails is the reverse of the order in which /// those blocks appear in MergePotentials (where they are not necessarily /// consecutive). -unsigned BranchFolder::ComputeSameTails(unsigned CurHash, - unsigned minCommonTailLength) { +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { unsigned maxCommonTailLength = 0U; SameTails.clear(); MachineBasicBlock::iterator TrialBBI1, TrialBBI2; MPIterator HighestMPIter = prior(MergePotentials.end()); for (MPIterator CurMPIter = prior(MergePotentials.end()), - B = MergePotentials.begin(); - CurMPIter!=B && CurMPIter->first==CurHash; + B = MergePotentials.begin(); + CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) { - for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) { + for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) { unsigned CommonTailLen; - if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength, - CommonTailLen, TrialBBI1, TrialBBI2)) { + if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), + minCommonTailLength, + CommonTailLen, TrialBBI1, TrialBBI2, + SuccBB, PredBB)) { if (CommonTailLen > maxCommonTailLength) { SameTails.clear(); maxCommonTailLength = CommonTailLen; HighestMPIter = CurMPIter; - SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1)); + SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1)); } if (HighestMPIter == CurMPIter && CommonTailLen == maxCommonTailLength) - SameTails.push_back(std::make_pair(I, TrialBBI2)); + SameTails.push_back(SameTailElt(I, TrialBBI2)); } - if (I==B) + if (I == B) break; } } @@ -518,21 +595,21 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash, /// RemoveBlocksWithHash - Remove all blocks with hash CurHash from /// MergePotentials, restoring branches at ends of blocks as appropriate. -void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, - MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB) { +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { MPIterator CurMPIter, B; - for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); - CurMPIter->first==CurHash; + for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; --CurMPIter) { // Put the unconditional branch back, if we need one. - MachineBasicBlock *CurMBB = CurMPIter->second; + MachineBasicBlock *CurMBB = CurMPIter->getBlock(); if (SuccBB && CurMBB != PredBB) FixTail(CurMBB, SuccBB, TII); - if (CurMPIter==B) + if (CurMPIter == B) break; } - if (CurMPIter->first!=CurHash) + if (CurMPIter->getHash() != CurHash) CurMPIter++; MergePotentials.erase(CurMPIter, MergePotentials.end()); } @@ -541,35 +618,37 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// only of the common tail. Create a block that does by splitting one. unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, unsigned maxCommonTailLength) { - unsigned i, commonTailIndex; + unsigned commonTailIndex = 0; unsigned TimeEstimate = ~0U; - for (i=0, commonTailIndex=0; i<SameTails.size(); i++) { + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. - if (SameTails[i].first->second==PredBB) { + if (SameTails[i].getBlock() == PredBB) { commonTailIndex = i; break; } // Otherwise, make a (fairly bogus) choice based on estimate of // how long it will take the various blocks to execute. - unsigned t = EstimateRuntime(SameTails[i].first->second->begin(), - SameTails[i].second); - if (t<=TimeEstimate) { + unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(), + SameTails[i].getTailStartPos()); + if (t <= TimeEstimate) { TimeEstimate = t; commonTailIndex = i; } } - MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second; - MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + MachineBasicBlock::iterator BBI = + SameTails[commonTailIndex].getTailStartPos(); + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size " + DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); - SameTails[commonTailIndex].first->second = newMBB; - SameTails[commonTailIndex].second = newMBB->begin(); + SameTails[commonTailIndex].setBlock(newMBB); + SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); + // If we split PredBB, newMBB is the new predecessor. - if (PredBB==MBB) + if (PredBB == MBB) PredBB = newMBB; return commonTailIndex; @@ -579,35 +658,49 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, // successor, or all have no successor) can be tail-merged. If there is a // successor, any blocks in MergePotentials that are not tail-merged and // are not immediately before Succ must have an unconditional branch to -// Succ added (but the predecessor/successor lists need no adjustment). +// Succ added (but the predecessor/successor lists need no adjustment). // The lone predecessor of Succ that falls through into Succ, // if any, is given in PredBB. -bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, - MachineBasicBlock* PredBB) { +bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { bool MadeChange = false; - // It doesn't make sense to save a single instruction since tail merging - // will add a jump. - // FIXME: Ask the target to provide the threshold? - unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1; - - DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n'); + // Except for the special cases below, tail-merge if there are at least + // this many instructions in common. + unsigned minCommonTailLength = TailMergeSize; + + DEBUG(errs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + errs() << "BB#" << MergePotentials[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", "); + errs() << "\n"; + if (SuccBB) { + errs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + if (PredBB) + errs() << " which has fall-through from BB#" + << PredBB->getNumber() << "\n"; + } + errs() << "Looking for common tails of at least " + << minCommonTailLength << " instruction" + << (minCommonTailLength == 1 ? "" : "s") << '\n'; + ); // Sort by hash value so that blocks with identical end sequences sort // together. - std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare); + std::stable_sort(MergePotentials.begin(), MergePotentials.end()); // Walk through equivalence sets looking for actual exact matches. while (MergePotentials.size() > 1) { - unsigned CurHash = prior(MergePotentials.end())->first; - + unsigned CurHash = MergePotentials.back().getHash(); + // Build SameTails, identifying the set of blocks with this hash code // and with the maximum number of instructions in common. - unsigned maxCommonTailLength = ComputeSameTails(CurHash, - minCommonTailLength); + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength, + SuccBB, PredBB); - // If we didn't find any pair that has at least minCommonTailLength + // If we didn't find any pair that has at least minCommonTailLength // instructions in common, remove all blocks with this hash code and retry. if (SameTails.empty()) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); @@ -618,36 +711,58 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, // block, which we can't jump to), we can treat all blocks with this same // tail at once. Use PredBB if that is one of the possibilities, as that // will not introduce any extra branches. - MachineBasicBlock *EntryBB = MergePotentials.begin()->second-> - getParent()->begin(); - unsigned int commonTailIndex, i; - for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) { - MachineBasicBlock *MBB = SameTails[i].first->second; - if (MBB->begin() == SameTails[i].second && MBB != EntryBB) { - commonTailIndex = i; - if (MBB==PredBB) + MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()-> + getParent()->begin(); + unsigned commonTailIndex = SameTails.size(); + // If there are two blocks, check to see if one can be made to fall through + // into the other. + if (SameTails.size() == 2 && + SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && + SameTails[1].tailIsWholeBlock()) + commonTailIndex = 1; + else if (SameTails.size() == 2 && + SameTails[1].getBlock()->isLayoutSuccessor( + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock()) + commonTailIndex = 0; + else { + // Otherwise just pick one, favoring the fall-through predecessor if + // there is one. + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + MachineBasicBlock *MBB = SameTails[i].getBlock(); + if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + continue; + if (MBB == PredBB) { + commonTailIndex = i; break; + } + if (SameTails[i].tailIsWholeBlock()) + commonTailIndex = i; } } - if (commonTailIndex==SameTails.size()) { + if (commonTailIndex == SameTails.size() || + (SameTails[commonTailIndex].getBlock() == PredBB && + !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); } - MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for "); - for (unsigned int i=0; i<SameTails.size(); ++i) { - if (commonTailIndex==i) + DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber() + << " for "); + for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { + if (commonTailIndex == i) continue; - DEBUG(errs() << SameTails[i].first->second->getNumber() << ","); + DEBUG(errs() << "BB#" << SameTails[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", ")); // Hack the end off BB i, making it jump to BB commonTailIndex instead. - ReplaceTailWithBranchTo(SameTails[i].second, MBB); + ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. - MergePotentials.erase(SameTails[i].first); + MergePotentials.erase(SameTails[i].getMPIter()); } DEBUG(errs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks @@ -660,26 +775,27 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!EnableTailMerge) return false; - + bool MadeChange = false; // First find blocks with no successors. MergePotentials.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->succ_empty()) - MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I)); } + // See if we can do any tail merging on those. if (MergePotentials.size() < TailMergeThreshold && MergePotentials.size() >= 2) - MadeChange |= TryMergeBlocks(NULL, NULL); + MadeChange |= TryTailMergeBlocks(NULL, NULL); // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by // (1) temporarily removing any unconditional branch from the predecessor // to IBB, and // (2) alter conditional branches so they branch to the other block - // not IBB; this may require adding back an unconditional branch to IBB + // not IBB; this may require adding back an unconditional branch to IBB // later, where there wasn't one coming in. E.g. // Bcc IBB // fallthrough to QBB @@ -693,18 +809,19 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + I != E; ++I) { if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); - for (MachineBasicBlock::pred_iterator P = I->pred_begin(), + for (MachineBasicBlock::pred_iterator P = I->pred_begin(), E2 = I->pred_end(); P != E2; ++P) { - MachineBasicBlock* PBB = *P; + MachineBasicBlock *PBB = *P; // Skip blocks that loop to themselves, can't tail merge these. - if (PBB==IBB) + if (PBB == IBB) continue; // Visit each predecessor only once. if (!UniquePreds.insert(PBB)) @@ -715,7 +832,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Failing case: IBB is the target of a cbr, and // we cannot reverse the branch. SmallVector<MachineOperand, 4> NewCond(Cond); - if (!Cond.empty() && TBB==IBB) { + if (!Cond.empty() && TBB == IBB) { if (TII->ReverseBranchCondition(NewCond)) continue; // This is the QBB case described above @@ -727,20 +844,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // to have a bit in the edge so we didn't have to do all this. if (IBB->isLandingPad()) { MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock* PredNextBB = NULL; - if (IP!=MF.end()) + MachineBasicBlock *PredNextBB = NULL; + if (IP != MF.end()) PredNextBB = IP; - if (TBB==NULL) { - if (IBB!=PredNextBB) // fallthrough + if (TBB == NULL) { + if (IBB != PredNextBB) // fallthrough continue; } else if (FBB) { - if (TBB!=IBB && FBB!=IBB) // cbr then ubr + if (TBB != IBB && FBB != IBB) // cbr then ubr continue; } else if (Cond.empty()) { - if (TBB!=IBB) // ubr + if (TBB != IBB) // ubr continue; } else { - if (TBB!=IBB && IBB!=PredNextBB) // cbr + if (TBB != IBB && IBB != PredNextBB) // cbr continue; } } @@ -749,19 +866,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); } - MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U), + *P)); } } - if (MergePotentials.size() >= 2) - MadeChange |= TryMergeBlocks(I, PredBB); - // Reinsert an unconditional branch if needed. - // The 1 below can occur as a result of removing blocks in TryMergeBlocks. - PredBB = prior(I); // this may have been changed in TryMergeBlocks - if (MergePotentials.size()==1 && - MergePotentials.begin()->second != PredBB) - FixTail(MergePotentials.begin()->second, I, TII); + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + // Reinsert an unconditional branch if needed. + // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks. + PredBB = prior(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); } } return MadeChange; @@ -773,14 +891,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { bool BranchFolder::OptimizeBranches(MachineFunction &MF) { bool MadeChange = false; - + // Make sure blocks are numbered in order MF.RenumberBlocks(); for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; MadeChange |= OptimizeBlock(MBB); - + // If it is dead, remove it. if (MBB->pred_empty()) { RemoveDeadBlock(MBB); @@ -801,7 +919,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { /// bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, - MachineBasicBlock *TBB, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond) { MachineFunction::iterator Fallthrough = CurBB; @@ -809,14 +927,22 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, // If FallthroughBlock is off the end of the function, it can't fall through. if (Fallthrough == CurBB->getParent()->end()) return false; - + // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. if (!CurBB->isSuccessor(Fallthrough)) return false; - - // If we couldn't analyze the branch, assume it could fall through. - if (BranchUnAnalyzable) return true; - + + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicable check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. This + // is over-conservative though, because if an instruction isn't actually + // predicated we could still treat it like a barrier. + if (BranchUnAnalyzable) + return CurBB->empty() || !CurBB->back().getDesc().isBarrier() || + CurBB->back().getDesc().isPredicable(); + // If there is no branch, control always falls through. if (TBB == 0) return true; @@ -825,11 +951,11 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, if (MachineFunction::iterator(TBB) == Fallthrough || MachineFunction::iterator(FBB) == Fallthrough) return true; - - // If it's an unconditional branch to some block not the fall through, it + + // If it's an unconditional branch to some block not the fall through, it // doesn't fall through. if (Cond.empty()) return false; - + // Otherwise, if it is conditional and has no explicit false block, it falls // through. return FBB == 0; @@ -853,14 +979,14 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { /// fall-through to MBB1 than to fall through into MBB2. This has to return /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will /// result in infinite loops. -static bool IsBetterFallthrough(MachineBasicBlock *MBB1, +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { // Right now, we use a simple heuristic. If MBB2 ends with a call, and // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to // optimize branches that branch to either a return block or an assert block // into a fallthrough to the return. if (MBB1->empty() || MBB2->empty()) return false; - + // If there is a clear successor ordering we make sure that one block // will fall through to the next if (MBB1->isSuccessor(MBB2)) return true; @@ -871,14 +997,153 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); } +/// TailDuplicateBlocks - Look for small blocks that are unconditionally +/// branched to and do not fall through. Tail-duplicate their instructions +/// into their predecessors to eliminate (dynamic) branches. +bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + + // Only duplicate blocks that end with unconditional branches. + if (CanFallThrough(MBB)) + continue; + + MadeChange |= TailDuplicate(MBB, MF); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + +/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, + MachineFunction &MF) { + // Don't try to tail-duplicate single-block loops. + if (TailBB->isSuccessor(TailBB)) + return false; + + // Set the limit on the number of instructions to duplicate, with a default + // of one less than the tail-merge threshold. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount = + MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ? + 1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1); + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned i = 0; + bool HasCall = false; + for (MachineBasicBlock::iterator I = TailBB->begin(); + I != TailBB->end(); ++I, ++i) { + // Non-duplicable things shouldn't be tail-duplicated. + if (I->getDesc().isNotDuplicable()) return false; + // Don't duplicate more than the threshold. + if (i == MaxDuplicateCount) return false; + // Remember if we saw a call. + if (I->getDesc().isCall()) HasCall = true; + } + // Heuristically, don't tail-duplicate calls if it would expand code size, + // as it's less likely to be worth the extra cost. + if (i > 1 && HasCall) + return false; + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + if (PredBB->succ_size() > 1) continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() != 1) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB)) + continue; + + DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + // Clone the contents of TailBB into PredBB. + for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(I); + PredBB->insert(PredBB->end(), NewMI); + } + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); + + Changed = true; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && + TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(errs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *TailBB); + PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin());; + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(TailBB); + Changed = true; + } + + return Changed; +} + /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; + MachineFunction &MF = *MBB->getParent(); +ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; ++FallThrough; - + // If this block is empty, make everyone use its fall-through, not the block // explicitly. Landing pads should not do this since the landing-pad table // points to this block. Blocks with their addresses taken shouldn't be @@ -886,8 +1151,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) { // Dead block? Leave for cleanup later. if (MBB->pred_empty()) return MadeChange; - - if (FallThrough == MBB->getParent()->end()) { + + if (FallThrough == MF.end()) { // TODO: Simplify preds to not branch here if possible! } else { // Rewrite all predecessors of the old block to go to the fallthrough @@ -898,8 +1163,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. - MBB->getParent()->getJumpTableInfo()-> - ReplaceMBBInJumpTables(MBB, FallThrough); + MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough); MadeChange = true; } return MadeChange; @@ -917,29 +1181,49 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the CFG for the prior block has extra edges, remove them. MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, !PriorCond.empty()); - + // If the previous branch is conditional and both conditions go to the same // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { TII->RemoveBranch(PrevBB); - PriorCond.clear(); + PriorCond.clear(); if (PriorTBB != MBB) TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } - + + // If the previous block unconditionally falls through to this block and + // this block has no other predecessors, move the contents of this block + // into the prior block. This doesn't usually happen when SimplifyCFG + // has been used, but it can happen if tail merging splits a fall-through + // predecessor of a block. + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && + PrevBB.succ_size() == 1 && + !MBB->hasAddressTaken()) { + DEBUG(errs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *MBB); + PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin());; + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(MBB); + MadeChange = true; + return MadeChange; + } + // If the previous branch *only* branches to *this* block (conditional or // not) remove the branch. if (PriorTBB == MBB && PriorFBB == 0) { TII->RemoveBranch(PrevBB); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } - + // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { @@ -947,9 +1231,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } - + // If the prior block branches here on true and somewhere else on false, and // if the branch condition is reversible, reverse the branch to create a // fall-through. @@ -960,10 +1244,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } - + // If this block has no successors (e.g. it is a return block or ends with // a call to a no-return function like abort or __cxa_throw) and if the pred // falls through into this block, and if it would otherwise fall through @@ -976,13 +1260,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineFunction::iterator(PriorTBB) == FallThrough && !CanFallThrough(MBB)) { bool DoTransform = true; - + // We have to be careful that the succs of PredBB aren't both no-successor // blocks. If neither have successors and if PredBB is the second from // last block in the function, we'd just keep swapping the two blocks for // last. Only do the swap if one is clearly better to fall through than // the other. - if (FallThrough == --MBB->getParent()->end() && + if (FallThrough == --MF.end() && !IsBetterFallthrough(PriorTBB, MBB)) DoTransform = false; @@ -1000,20 +1284,20 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (DoTransform && !MBB->succ_empty() && (!CanFallThrough(PriorTBB) || PriorTBB->empty())) DoTransform = false; - - + + if (DoTransform) { // Reverse the branch so we will fall through on the previous true cond. SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { DEBUG(errs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); - + TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); // Move this block to the end of the function. - MBB->moveAfter(--MBB->getParent()->end()); + MBB->moveAfter(--MF.end()); MadeChange = true; ++NumBranchOpts; return MadeChange; @@ -1021,7 +1305,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } } } - + // Analyze the branch in the current block. MachineBasicBlock *CurTBB = 0, *CurFBB = 0; SmallVector<MachineOperand, 4> CurCond; @@ -1030,7 +1314,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // If the CFG for the prior block has extra edges, remove them. MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); - // If this is a two-way branch, and the FBB branches to this block, reverse + // If this is a two-way branch, and the FBB branches to this block, reverse // the condition so the single-basic-block loop is faster. Instead of: // Loop: xxx; jcc Out; jmp Loop // we want: @@ -1042,14 +1326,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); MadeChange = true; ++NumBranchOpts; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } - - + // If this branch is the only thing in its block, see if we can forward // other blocks across it. - if (CurTBB && CurCond.empty() && CurFBB == 0 && + if (CurTBB && CurCond.empty() && CurFBB == 0 && MBB->begin()->getDesc().isBranch() && CurTBB != MBB && !MBB->hasAddressTaken()) { // This block may contain just an unconditional branch. Because there can @@ -1068,7 +1351,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { !PrevBB.isSuccessor(MBB)) { // If the prior block falls through into us, turn it into an // explicit branch to us to make updates simpler. - if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && PriorTBB != MBB && PriorFBB != MBB) { if (PriorTBB == 0) { assert(PriorCond.empty() && PriorFBB == 0 && @@ -1104,18 +1387,17 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { TII->RemoveBranch(*PMBB); - NewCurCond.clear(); + NewCurCond.clear(); TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); MadeChange = true; ++NumBranchOpts; - PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false); + PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); } } } // Change any jumptables to go to the new MBB. - MBB->getParent()->getJumpTableInfo()-> - ReplaceMBBInJumpTables(MBB, CurTBB); + MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB); if (DidChange) { ++NumBranchOpts; MadeChange = true; @@ -1123,7 +1405,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } } } - + // Add the branch back if the block is more than just an uncond branch. TII->InsertBranch(*MBB, CurTBB, 0, CurCond); } @@ -1134,9 +1416,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // place to move this block where a fall-through will happen. if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, PriorTBB, PriorFBB, PriorCond)) { + // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. - bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, + bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, CurCond); if (!MBB->isLandingPad()) { @@ -1147,12 +1430,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - if (PredBB != MBB && !CanFallThrough(PredBB) + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (PredBB != MBB && !CanFallThrough(PredBB) && + !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && (!CurFallsThru || !CurTBB || !CurFBB) && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { // If the current block doesn't fall through, just move it. // If the current block can fall through and does not end with a - // conditional branch, we need to append an unconditional jump to + // conditional branch, we need to append an unconditional jump to // the (current) next block. To avoid a possible compile-time // infinite loop, move blocks only backward in this case. // Also, if there are already 2 branches here, we cannot add a third; @@ -1167,11 +1453,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { } MBB->moveAfter(PredBB); MadeChange = true; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } } - + if (!CurFallsThru) { // Check all successors to see if we can move this block before it. for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), @@ -1179,26 +1465,29 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Analyze the branch at the end of the block before the succ. MachineBasicBlock *SuccBB = *SI; MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev; - std::vector<MachineOperand> SuccPrevCond; - + // If this block doesn't already fall-through to that successor, and if // the succ doesn't already have a block that can fall through into it, // and if the successor isn't an EH destination, we can arrange for the // fallthrough to happen. - if (SuccBB != MBB && !CanFallThrough(SuccPrev) && + if (SuccBB != MBB && &*SuccPrev != MBB && + !CanFallThrough(SuccPrev) && !CurUnAnalyzable && !SuccBB->isLandingPad()) { MBB->moveBefore(SuccBB); MadeChange = true; - return OptimizeBlock(MBB); + goto ReoptimizeBlock; } } - + // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - if (FallThrough != MBB->getParent()->end() && + MachineBasicBlock *PrevTBB, *PrevFBB; + SmallVector<MachineOperand, 4> PrevCond; + if (FallThrough != MF.end() && + !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && PrevBB.isSuccessor(FallThrough)) { - MBB->moveAfter(--MBB->getParent()->end()); + MBB->moveAfter(--MF.end()); MadeChange = true; return MadeChange; } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 9763e3339a20..4920755c227b 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -11,7 +11,6 @@ #define LLVM_CODEGEN_BRANCHFOLDING_HPP #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include <vector> namespace llvm { @@ -20,6 +19,7 @@ namespace llvm { class RegScavenger; class TargetInstrInfo; class TargetRegisterInfo; + template<typename T> class SmallVectorImpl; class BranchFolder { public: @@ -30,11 +30,58 @@ namespace llvm { const TargetRegisterInfo *tri, MachineModuleInfo *mmi); private: - typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt; + class MergePotentialsElt { + unsigned Hash; + MachineBasicBlock *Block; + public: + MergePotentialsElt(unsigned h, MachineBasicBlock *b) + : Hash(h), Block(b) {} + + unsigned getHash() const { return Hash; } + MachineBasicBlock *getBlock() const { return Block; } + + void setBlock(MachineBasicBlock *MBB) { + Block = MBB; + } + + bool operator<(const MergePotentialsElt &) const; + }; typedef std::vector<MergePotentialsElt>::iterator MPIterator; std::vector<MergePotentialsElt> MergePotentials; - typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt; + class SameTailElt { + MPIterator MPIter; + MachineBasicBlock::iterator TailStartPos; + public: + SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) + : MPIter(mp), TailStartPos(tsp) {} + + MPIterator getMPIter() const { + return MPIter; + } + MergePotentialsElt &getMergePotentialsElt() const { + return *getMPIter(); + } + MachineBasicBlock::iterator getTailStartPos() const { + return TailStartPos; + } + unsigned getHash() const { + return getMergePotentialsElt().getHash(); + } + MachineBasicBlock *getBlock() const { + return getMergePotentialsElt().getBlock(); + } + bool tailIsWholeBlock() const { + return TailStartPos == getBlock()->begin(); + } + + void setBlock(MachineBasicBlock *MBB) { + getMergePotentialsElt().setBlock(MBB); + } + void setTailStartPos(MachineBasicBlock::iterator Pos) { + TailStartPos = Pos; + } + }; std::vector<SameTailElt> SameTails; bool EnableTailMerge; @@ -44,18 +91,23 @@ namespace llvm { RegScavenger *RS; bool TailMergeBlocks(MachineFunction &MF); - bool TryMergeBlocks(MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); + bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1); - unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, unsigned maxCommonTailLength); + bool TailDuplicateBlocks(MachineFunction &MF); + bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); + bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); void RemoveDeadBlock(MachineBasicBlock *MBB); @@ -66,19 +118,6 @@ namespace llvm { MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond); }; - - - /// BranchFolderPass - Wrap branch folder in a machine function pass. - class BranchFolderPass : public MachineFunctionPass, - public BranchFolder { - public: - static char ID; - explicit BranchFolderPass(bool defaultEnableTailMerge) - : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } - }; } #endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 6fff12c0b0d5..e9844d84c17f 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -56,7 +56,6 @@ namespace { MachineFunction::iterator InsertPt, MachineFunction::iterator Begin, MachineFunction::iterator End); - void UpdateTerminator(MachineBasicBlock *MBB); bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, MachineLoop *L); bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, @@ -141,66 +140,9 @@ void CodePlacementOpt::Splice(MachineFunction &MF, MF.splice(InsertPt, Begin, End); - UpdateTerminator(prior(Begin)); - UpdateTerminator(OldBeginPrior); - UpdateTerminator(OldEndPrior); -} - -/// UpdateTerminator - Update the terminator instructions in MBB to account -/// for changes to the layout. If the block previously used a fallthrough, -/// it may now need a branch, and if it previously used branching it may now -/// be able to use a fallthrough. -/// -void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) { - // A block with no successors has no concerns with fall-through edges. - if (MBB->succ_empty()) return; - - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); - (void) B; - assert(!B && "UpdateTerminators requires analyzable predecessors!"); - if (Cond.empty()) { - if (TBB) { - // The block has an unconditional branch. If its successor is now - // its layout successor, delete the branch. - if (MBB->isLayoutSuccessor(TBB)) - TII->RemoveBranch(*MBB); - } else { - // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. - TBB = *MBB->succ_begin(); - if (!MBB->isLayoutSuccessor(TBB)) - TII->InsertBranch(*MBB, TBB, 0, Cond); - } - } else { - if (FBB) { - // The block has a non-fallthrough conditional branch. If one of its - // successors is its layout successor, rewrite it to a fallthrough - // conditional branch. - if (MBB->isLayoutSuccessor(TBB)) { - TII->RemoveBranch(*MBB); - TII->ReverseBranchCondition(Cond); - TII->InsertBranch(*MBB, FBB, 0, Cond); - } else if (MBB->isLayoutSuccessor(FBB)) { - TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, TBB, 0, Cond); - } - } else { - // The block has a fallthrough conditional branch. - MachineBasicBlock *MBBA = *MBB->succ_begin(); - MachineBasicBlock *MBBB = *next(MBB->succ_begin()); - if (MBBA == TBB) std::swap(MBBB, MBBA); - if (MBB->isLayoutSuccessor(TBB)) { - TII->RemoveBranch(*MBB); - TII->ReverseBranchCondition(Cond); - TII->InsertBranch(*MBB, MBBA, 0, Cond); - } else if (!MBB->isLayoutSuccessor(MBBA)) { - TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, TBB, MBBA, Cond); - } - } - } + prior(Begin)->updateTerminator(); + OldBeginPrior->updateTerminator(); + OldEndPrior->updateTerminator(); } /// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 3e3b28a8109b..8a3bd0bf4e00 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -515,6 +515,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + break; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + break; } assert(CI->use_empty() && diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 794ecf7bd193..23dce4a91a13 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -55,7 +55,10 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = 0; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + SUnit &Pred = *I->getSUnit(); if (!Pred.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the @@ -75,7 +78,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { unsigned NumNodesBlocking = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + if (getSingleUnscheduledPred(I->getSUnit()) == SU) ++NumNodesBlocking; } @@ -92,7 +98,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 2a93a35b3faf..a60d34f58a91 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -53,7 +53,8 @@ static cl::opt<bool> DisableReMat("disable-rematerialization", static cl::opt<bool> EnableFastSpilling("fast-spill", cl::init(false), cl::Hidden); -static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false)); +static cl::opt<bool> EarlyCoalescing("early-coalescing", + cl::init(false), cl::Hidden); static cl::opt<int> CoalescingLimit("early-coalescing-limit", cl::init(-1), cl::Hidden); @@ -646,17 +647,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, 0, false, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); - + interval.addRange(LR); LR.valno->addKill(end); DEBUG(errs() << " +" << LR << '\n'); } -bool -LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt, - SmallVector<MachineInstr*,16> &IdentCopies, - SmallVector<MachineInstr*,16> &OtherCopies) { - bool HaveConflict = false; +bool LiveIntervals:: +isSafeAndProfitableToCoalesce(LiveInterval &DstInt, + LiveInterval &SrcInt, + SmallVector<MachineInstr*,16> &IdentCopies, + SmallVector<MachineInstr*,16> &OtherCopies) { unsigned NumIdent = 0; for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg), re = mri_->def_end(); ri != re; ++ri) { @@ -665,16 +666,16 @@ LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) return false; if (SrcReg != DstInt.reg) { + // Non-identity copy - we cannot handle overlapping intervals + if (DstInt.liveAt(getInstructionIndex(MI))) + return false; OtherCopies.push_back(MI); - HaveConflict |= DstInt.liveAt(getInstructionIndex(MI)); } else { IdentCopies.push_back(MI); ++NumIdent; } } - if (!HaveConflict) - return false; // Let coalescer handle it return IdentCopies.size() > OtherCopies.size(); } @@ -701,19 +702,21 @@ void LiveIntervals::performEarlyCoalescing() { LiveInterval &SrcInt = getInterval(PHISrc); SmallVector<MachineInstr*, 16> IdentCopies; SmallVector<MachineInstr*, 16> OtherCopies; - if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies)) + if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt, + IdentCopies, OtherCopies)) continue; DEBUG(errs() << "PHI Join: " << *Join); assert(DstInt.containsOneValue() && "PHI join should have just one val#!"); + assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 && + "PHI join src should not be used elsewhere"); VNInfo *VNI = DstInt.getValNumInfo(0); // Change the non-identity copies to directly target the phi destination. for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) { MachineInstr *PHICopy = OtherCopies[i]; - DEBUG(errs() << "Moving: " << *PHICopy); - SlotIndex MIIndex = getInstructionIndex(PHICopy); + DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy); SlotIndex DefIndex = MIIndex.getDefIndex(); LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); SlotIndex StartIndex = SLR->start; @@ -724,8 +727,7 @@ void LiveIntervals::performEarlyCoalescing() { SrcInt.removeValNo(SLR->valno); DEBUG(errs() << " added range [" << StartIndex << ',' << EndIndex << "] to reg" << DstInt.reg << '\n'); - if (DstInt.liveAt(StartIndex)) - DstInt.removeRange(StartIndex, EndIndex); + assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!"); VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true, VNInfoAllocator); NewVNI->setHasPHIKill(true); diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 96c655c1a9b8..16a79bb54e97 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -50,6 +50,14 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +MachineInstr * +LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + if (Kills[i]->getParent() == MBB) + return Kills[i]; + return NULL; +} + void LiveVariables::VarInfo::dump() const { errs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), @@ -222,8 +230,9 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, /// implicit defs to a machine instruction if there was an earlier def of its /// super-register. void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; // If there was a previous use or a "full" def all is well. - if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) { + if (!LastDef && !PhysRegUse[Reg]) { // Otherwise, the last sub-register def implicitly defines this register. // e.g. // AH = @@ -257,6 +266,11 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { } } } + else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) + // Last def defines the super register, add an implicit def of reg. + LastDef->addOperand(MachineOperand::CreateReg(Reg, + true/*IsDef*/, true/*IsImp*/)); // Remember this use. PhysRegUse[Reg] = MI; @@ -641,3 +655,36 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] .push_back(BBI->getOperand(i).getReg()); } + +/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All +/// variables that are live out of DomBB will be marked as passing live through +/// BB. +void LiveVariables::addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB) { + const unsigned NumNew = BB->getNumber(); + const unsigned NumDom = DomBB->getNumber(); + + // Update info for all live variables + for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, + E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) { + VarInfo &VI = getVarInfo(Reg); + + // Anything live through DomBB is also live through BB. + if (VI.AliveBlocks.test(NumDom)) { + VI.AliveBlocks.set(NumNew); + continue; + } + + // Variables not defined in DomBB cannot be live out. + const MachineInstr *Def = MRI->getVRegDef(Reg); + if (!Def || Def->getParent() != DomBB) + continue; + + // Killed by DomBB? + if (VI.findKill(DomBB)) + continue; + + // This register is defined in DomBB and live out + VI.AliveBlocks.set(NumNew); + } +} diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 7fbdb128fd40..cd52825d21f1 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -17,6 +17,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrDesc.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" @@ -242,6 +243,58 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { getParent()->splice(++BBI, this); } +void MachineBasicBlock::updateTerminator() { + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + // A block with no successors has no concerns with fall-through edges. + if (this->succ_empty()) return; + + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (isLayoutSuccessor(TBB)) + TII->RemoveBranch(*this); + } else { + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. + TBB = *succ_begin(); + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond); + } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*this); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*this, FBB, 0, Cond); + } else if (isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, 0, Cond); + } + } else { + // The block has a fallthrough conditional branch. + MachineBasicBlock *MBBA = *succ_begin(); + MachineBasicBlock *MBBB = *next(succ_begin()); + if (MBBA == TBB) std::swap(MBBB, MBBA); + if (isLayoutSuccessor(TBB)) { + TII->RemoveBranch(*this); + TII->ReverseBranchCondition(Cond); + TII->InsertBranch(*this, MBBA, 0, Cond); + } else if (!isLayoutSuccessor(MBBA)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, MBBA, Cond); + } + } + } +} void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) { Successors.push_back(succ); @@ -371,10 +424,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock::succ_iterator SI = succ_begin(); MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB; while (SI != succ_end()) { - if (*SI == DestA && DestA == DestB) { - DestA = DestB = 0; - ++SI; - } else if (*SI == DestA) { + if (*SI == DestA) { DestA = 0; ++SI; } else if (*SI == DestB) { @@ -397,3 +447,8 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, } return MadeChange; } + +void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB, + bool t) { + OS << "BB#" << MBB->getNumber(); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5a1d9e687903..81d1301336b8 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -441,9 +441,10 @@ DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const { /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { + bool Immutable, bool isSS) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable)); + Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable, + isSS)); return -++NumFixedObjects; } @@ -529,10 +530,6 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const { unsigned MachineJumpTableInfo::getJumpTableIndex( const std::vector<MachineBasicBlock*> &DestBBs) { assert(!DestBBs.empty() && "Cannot create an empty jump table!"); - for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) - if (JumpTables[i].MBBs == DestBBs) - return i; - JumpTables.push_back(MachineJumpTableEntry(DestBBs)); return JumpTables.size()-1; } @@ -544,14 +541,25 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New) { assert(Old != New && "Not making a change?"); bool MadeChange = false; - for (size_t i = 0, e = JumpTables.size(); i != e; ++i) { - MachineJumpTableEntry &JTE = JumpTables[i]; - for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) - if (JTE.MBBs[j] == Old) { - JTE.MBBs[j] = New; - MadeChange = true; - } - } + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) + ReplaceMBBInJumpTable(i, Old, New); + return MadeChange; +} + +/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update +/// the jump table to branch to New instead. +bool +MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, + MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + MachineJumpTableEntry &JTE = JumpTables[Idx]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } return MadeChange; } diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 56294d90398f..f5febc5a4ca4 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -24,7 +24,7 @@ X("Machine Function Analysis", "machine-function-analysis", char MachineFunctionAnalysis::ID = 0; -MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm, +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, CodeGenOpt::Level OL) : FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) { } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 5744c8a54552..b250faa62ae6 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -189,19 +189,19 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { /// print - Print the specified machine operand. /// void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { + // If the instruction is embedded into a basic block, we can find the + // target info for the instruction. + if (!TM) + if (const MachineInstr *MI = getParent()) + if (const MachineBasicBlock *MBB = MI->getParent()) + if (const MachineFunction *MF = MBB->getParent()) + TM = &MF->getTarget(); + switch (getType()) { case MachineOperand::MO_Register: if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) { OS << "%reg" << getReg(); } else { - // If the instruction is embedded into a basic block, we can find the - // target info for the instruction. - if (TM == 0) - if (const MachineInstr *MI = getParent()) - if (const MachineBasicBlock *MBB = MI->getParent()) - if (const MachineFunction *MF = MBB->getParent()) - TM = &MF->getTarget(); - if (TM) OS << "%" << TM->getRegisterInfo()->get(getReg()).Name; else @@ -265,7 +265,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "<jt#" << getIndex() << '>'; break; case MachineOperand::MO_GlobalAddress: - OS << "<ga:" << ((Value*)getGlobal())->getName(); + OS << "<ga:"; + WriteAsOperand(OS, getGlobal(), /*PrintType=*/false); if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; @@ -375,7 +376,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -395,7 +396,8 @@ void MachineInstr::addImplicitDefUseOperands() { /// TargetInstrDesc or the numOperands if it is not zero. (for /// instructions with variable number of operands). MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -413,7 +415,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -433,7 +435,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// basic block. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) @@ -453,7 +456,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) @@ -472,7 +475,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), + : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -1060,9 +1063,16 @@ void MachineInstr::dump() const { } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { - unsigned StartOp = 0, e = getNumOperands(); + // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. + const MachineFunction *MF = 0; + if (const MachineBasicBlock *MBB = getParent()) { + MF = MBB->getParent(); + if (!TM && MF) + TM = &MF->getTarget(); + } // Print explicitly defined operands on the left of an assignment syntax. + unsigned StartOp = 0, e = getNumOperands(); for (; StartOp < e && getOperand(StartOp).isReg() && getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit(); @@ -1078,11 +1088,45 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << getDesc().getName(); // Print the rest of the operands. + bool OmittedAnyCallClobbers = false; + bool FirstOp = true; for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { - if (i != StartOp) - OS << ","; + const MachineOperand &MO = getOperand(i); + + // Omit call-clobbered registers which aren't used anywhere. This makes + // call instructions much less noisy on targets where calls clobber lots + // of registers. Don't rely on MO.isDead() because we may be called before + // LiveVariables is run, or we may be looking at a non-allocatable reg. + if (MF && getDesc().isCall() && + MO.isReg() && MO.isImplicit() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + const MachineRegisterInfo &MRI = MF->getRegInfo(); + if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { + bool HasAliasLive = false; + for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg); + unsigned AliasReg = *Alias; ++Alias) + if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { + HasAliasLive = true; + break; + } + if (!HasAliasLive) { + OmittedAnyCallClobbers = true; + continue; + } + } + } + } + + if (FirstOp) FirstOp = false; else OS << ","; OS << " "; - getOperand(i).print(OS, TM); + MO.print(OS, TM); + } + + // Briefly indicate whether any call clobbers were omitted. + if (OmittedAnyCallClobbers) { + if (FirstOp) FirstOp = false; else OS << ","; + OS << " ..."; } bool HaveSemi = false; @@ -1098,12 +1142,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } } - if (!debugLoc.isUnknown()) { + if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; HaveSemi = true; // TODO: print InlinedAtLoc information - const MachineFunction *MF = getParent()->getParent(); DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); DICompileUnit CU(DLT.Scope); if (!CU.isNull()) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index de3ab273b39a..33b6b823446e 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -43,6 +44,7 @@ STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); namespace { class MachineLICM : public MachineFunctionPass { + MachineConstantPool *MCP; const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -111,6 +113,11 @@ namespace { /// be hoistable. MachineInstr *ExtractHoistableLoad(MachineInstr *MI); + /// LookForDuplicate - Find an instruction amount PrevMIs that is a + /// duplicate of MI. Return this instruction if it's found. + const MachineInstr *LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs); + /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on /// the preheader that compute the same value. If it's found, do a RAU on /// with the definition of the existing instruction rather than hoisting @@ -153,6 +160,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "******** Machine LICM ********\n"); Changed = FirstInLoop = false; + MCP = MF.getConstantPool(); TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); @@ -234,9 +242,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. if (!I.isInvariantLoad(AA)) - // FIXME: we should be able to sink loads with no other side effects if - // there is nothing that can change memory from here until the end of - // block. This is a trivial form of alias analysis. + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. return false; } @@ -432,32 +440,12 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { } } -static const MachineInstr *LookForDuplicate(const MachineInstr *MI, - std::vector<const MachineInstr*> &PrevMIs, - MachineRegisterInfo *RegInfo) { - unsigned NumOps = MI->getNumOperands(); +const MachineInstr* +MachineLICM::LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - unsigned NumOps2 = PrevMI->getNumOperands(); - if (NumOps != NumOps2) - continue; - bool IsSame = true; - for (unsigned j = 0; j != NumOps; ++j) { - const MachineOperand &MO = MI->getOperand(j); - if (MO.isReg() && MO.isDef()) { - if (RegInfo->getRegClass(MO.getReg()) != - RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) { - IsSame = false; - break; - } - continue; - } - if (!MO.isIdenticalTo(PrevMI->getOperand(j))) { - IsSame = false; - break; - } - } - if (IsSame) + if (TII->isIdentical(MI, PrevMI, RegInfo)) return PrevMI; } return 0; @@ -465,18 +453,19 @@ static const MachineInstr *LookForDuplicate(const MachineInstr *MI, bool MachineLICM::EliminateCSE(MachineInstr *MI, DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { - if (CI != CSEMap.end()) { - if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo)) { - DEBUG(errs() << "CSEing " << *MI << " with " << *Dup); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDef()) - RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); - } - MI->eraseFromParent(); - ++NumCSEed; - return true; + if (CI == CSEMap.end()) + return false; + + if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + DEBUG(errs() << "CSEing " << *MI << " with " << *Dup); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); } + MI->eraseFromParent(); + ++NumCSEed; + return true; } return false; } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index b62803f105e4..4b067a0aa98b 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -76,9 +76,7 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN VariableDbgInfo.clear(); -#endif } /// AnalyzeModule - Scan the module for global debug information. diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 99812e0aa8a2..be9f68f6a725 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -175,6 +175,10 @@ FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { return new MachineVerifier(allowPhysDoubleDefs); } +void MachineFunction::verify() const { + MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this)); +} + bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { @@ -287,7 +291,18 @@ void MachineVerifier::visitMachineFunctionBefore() { markReachable(&MF->front()); } -void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { +// Does iterator point to a and b as the first two elements? +bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { + if (*i == a) + return *++i == b; + if (*i == b) + return *++i == a; + return false; +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); // Start with minimal CFG sanity checks. @@ -379,8 +394,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) } if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); - } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) || - (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) { + } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { report("MBB exits via conditional branch/fall-through but the CFG " "successors don't match the actual successors!", MBB); } @@ -400,8 +414,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); - } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) || - (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) { + } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { report("MBB exits via conditional branch/branch but the CFG " "successors don't match the actual successors!", MBB); } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 8071b0a81a89..cd38dd144c58 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -15,24 +15,32 @@ #define DEBUG_TYPE "phielim" #include "PHIElimination.h" -#include "llvm/BasicBlock.h" -#include "llvm/Instructions.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/Function.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include <algorithm> #include <map> using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumSplits, "Number of critical edges split on demand"); + +static cl::opt<bool> +SplitEdges("split-phi-edges", + cl::desc("Split critical edges during phi elimination"), + cl::init(false), cl::Hidden); char PHIElimination::ID = 0; static RegisterPass<PHIElimination> @@ -40,11 +48,26 @@ X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); const PassInfo *const llvm::PHIEliminationID = &X; +namespace llvm { FunctionPass *createLocalRegisterAllocator(); } + +// Should we run edge splitting? +static bool shouldSplitEdges() { + // Edge splitting breaks the local register allocator. It cannot tolerate + // LiveVariables being run. + if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator) + return false; + return SplitEdges; +} + void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); AU.addPreserved<LiveVariables>(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); + AU.addPreserved<MachineDominatorTree>(); + if (shouldSplitEdges()) { + AU.addRequired<LiveVariables>(); + } else { + AU.setPreservesCFG(); + AU.addPreservedID(MachineLoopInfoID); + } MachineFunctionPass::getAnalysisUsage(AU); } @@ -53,10 +76,16 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { PHIDefs.clear(); PHIKills.clear(); - analyzePHINodes(Fn); - bool Changed = false; + // Split critical edges to help the coalescer + if (shouldSplitEdges()) + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= SplitPHIEdges(Fn, *I); + + // Populate VRegPHIUseCount + analyzePHINodes(Fn); + // Eliminate PHI instructions by inserting copies into predecessor blocks. for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) Changed |= EliminatePHINodes(Fn, *I); @@ -75,7 +104,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { return Changed; } - /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. /// @@ -107,26 +135,28 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, return true; } -// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. -// This needs to be after any def or uses of SrcReg, but before any subsequent -// point where control flow might jump out of the basic block. +// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg +// when following the CFG edge to SuccMBB. This needs to be after any def of +// SrcReg, but before any subsequent point where control flow might jump out of +// the basic block. MachineBasicBlock::iterator llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB, unsigned SrcReg) { // Handle the trivial case trivially. if (MBB.empty()) return MBB.begin(); - // If this basic block does not contain an invoke, then control flow always - // reaches the end of it, so place the copy there. The logic below works in - // this case too, but is more expensive. - if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator())) + // Usually, we just want to insert the copy before the first terminator + // instruction. However, for the edge going to a landing pad, we must insert + // the copy before the call/invoke instruction. + if (!SuccMBB.isLandingPad()) return MBB.getFirstTerminator(); - // Discover any definition/uses in this basic block. + // Discover any defs/uses in this basic block. SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ++RI) { + RE = MRI->reg_end(); RI != RE; ++RI) { MachineInstr *DefUseMI = &*RI; if (DefUseMI->getParent() == &MBB) DefUsesInMBB.insert(DefUseMI); @@ -134,14 +164,14 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPoint; if (DefUsesInMBB.empty()) { - // No def/uses. Insert the copy at the start of the basic block. + // No defs. Insert the copy at the start of the basic block. InsertPoint = MBB.begin(); } else if (DefUsesInMBB.size() == 1) { - // Insert the copy immediately after the definition/use. + // Insert the copy immediately after the def/use. InsertPoint = *DefUsesInMBB.begin(); ++InsertPoint; } else { - // Insert the copy immediately after the last definition/use. + // Insert the copy immediately after the last def/use. InsertPoint = MBB.end(); while (!DefUsesInMBB.count(&*--InsertPoint)) {} ++InsertPoint; @@ -155,7 +185,7 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. -/// +/// void llvm::PHIElimination::LowerAtomicPHINode( MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt) { @@ -186,7 +216,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( } // Record PHI def. - assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); + assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); PHIDefs[DestReg] = &MBB; // Update live variable information if there is any. @@ -250,92 +280,35 @@ void llvm::PHIElimination::LowerAtomicPHINode( // basic block. if (!MBBsInsertedInto.insert(&opBlock)) continue; // If the copy has already been emitted, we're done. - + // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). - MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg); + MachineBasicBlock::iterator InsertPos = + FindCopyInsertPoint(opBlock, MBB, SrcReg); // Insert the copy. TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; - + // We want to be able to insert a kill of the register if this PHI (aka, the // copy we just inserted) is the last use of the source value. Live // variable analysis conservatively handles this by saying that the value is // live until the end of the block the PHI entry lives in. If the value // really is dead at the PHI copy, there will be no successor blocks which // have the value live-in. - // - // Check to see if the copy is the last use, and if so, update the live - // variables information so that it knows the copy source instruction kills - // the incoming value. - LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg); - - // Loop over all of the successors of the basic block, checking to see if - // the value is either live in the block, or if it is killed in the block. + // Also check to see if this register is in use by another PHI node which // has not yet been eliminated. If so, it will be killed at an appropriate // point later. // Is it used by any PHI instructions in this block? - bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; - - std::vector<MachineBasicBlock*> OpSuccBlocks; - - // Otherwise, scan successors, including the BB the PHI node lives in. - for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), - E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - - // Is it alive in this successor? - unsigned SuccIdx = SuccMBB->getNumber(); - if (InRegVI.AliveBlocks.test(SuccIdx)) { - ValueIsLive = true; - break; - } - - OpSuccBlocks.push_back(SuccMBB); - } - - // Check to see if this value is live because there is a use in a successor - // that kills it. - if (!ValueIsLive) { - switch (OpSuccBlocks.size()) { - case 1: { - MachineBasicBlock *MBB = OpSuccBlocks[0]; - for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) - if (InRegVI.Kills[i]->getParent() == MBB) { - ValueIsLive = true; - break; - } - break; - } - case 2: { - MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1]; - for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) - if (InRegVI.Kills[i]->getParent() == MBB1 || - InRegVI.Kills[i]->getParent() == MBB2) { - ValueIsLive = true; - break; - } - break; - } - default: - std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); - for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i) - if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), - InRegVI.Kills[i]->getParent())) { - ValueIsLive = true; - break; - } - } - } + bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0; // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsLive) { + if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, the first // terminator instruction at the end of the block may also use the value. @@ -346,7 +319,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( if (Term != opBlock.end()) { if (Term->readsRegister(SrcReg)) KillInst = Term; - + // Check that no other terminators use values. #ifndef NDEBUG for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end(); @@ -357,16 +330,16 @@ void llvm::PHIElimination::LowerAtomicPHINode( } #endif } - + // Finally, mark it killed. LV->addVirtualRegisterKilled(SrcReg, KillInst); // This vreg no longer lives all of the way through opBlock. unsigned opBlockNum = opBlock.getNumber(); - InRegVI.AliveBlocks.reset(opBlockNum); + LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } } - + // Really delete the PHI instruction now! MF.DeleteMachineInstr(MPhi); ++NumAtomic; @@ -386,3 +359,134 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(), BBI->getOperand(i).getReg())]; } + +bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) + return false; // Quick exit for basic blocks without PHIs. + LiveVariables &LV = getAnalysis<LiveVariables>(); + for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); + // We break edges when registers are live out from the predecessor block + // (not considering PHI nodes). If the register is live in to this block + // anyway, we would gain nothing from splitting. + if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV)) + SplitCriticalEdge(PreMBB, &MBB); + } + } + return true; +} + +bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV) { + LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + std::vector<MachineBasicBlock*> OpSuccBlocks; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB) + return true; + break; + } + case 2: { + MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB1 || + VI.Kills[i]->getParent() == SuccMBB2) + return true; + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + VI.Kills[i]->getParent())) + return true; + } + return false; +} + +bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV) { + LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); + + if (VI.AliveBlocks.test(MBB.getNumber())) + return true; + + // defined in MBB? + const MachineInstr *Def = MRI->getVRegDef(Reg); + if (Def && Def->getParent() == &MBB) + return false; + + // killed in MBB? + return VI.findKill(&MBB); +} + +MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, + MachineBasicBlock *B) { + assert(A && B && "Missing MBB end point"); + + MachineFunction *MF = A->getParent(); + + // We may need to update A's terminator, but we can't do that if AnalyzeBranch + // fails. If A uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*A, TBB, FBB, Cond)) + return NULL; + + ++NumSplits; + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->push_back(NMBB); + DEBUG(errs() << "PHIElimination splitting critical edge:" + " BB#" << A->getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << B->getNumber() << '\n'); + + A->ReplaceUsesOfBlockWith(B, NMBB); + // If A may fall through to B, we may have to insert a branch. + if (A->isLayoutSuccessor(B)) + A->updateTerminator(); + + // Insert unconditional "jump B" instruction in NMBB. + NMBB->addSuccessor(B); + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); + + // Fix PHI nodes in B so they refer to NMBB instead of A + for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); + i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == A) + i->getOperand(ni+1).setMBB(NMBB); + + if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>()) + LV->addNewBlock(NMBB, A); + + if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>()) + MDT->addNewBlock(NMBB, A); + + return NMBB; +} diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 3d02dfdcddba..94716eef6c51 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -89,11 +89,33 @@ namespace llvm { /// void analyzePHINodes(const MachineFunction& Fn); - // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from - // SrcReg. This needs to be after any def or uses of SrcReg, but before - // any subsequent point where control flow might jump out of the basic - // block. + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB); + + /// isLiveOut - Determine if Reg is live out from MBB, when not + /// considering PHI nodes. This means that Reg is either killed by + /// a successor block or passed through one. + bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV); + + /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI + /// source registers. This means that Reg is either killed by MBB or passes + /// through it. + bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB, + LiveVariables &LV); + + /// SplitCriticalEdge - Split a critical edge from A to B by + /// inserting a new MBB. Update branches in A and PHI instructions + /// in B. Return the new block. + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A, + MachineBasicBlock *B); + + /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + /// SrcReg when following the CFG edge to SuccMBB. This needs to be after + /// any def of SrcReg, but before any subsequent point where control flow + /// might jump out of the basic block. MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + MachineBasicBlock &SuccMBB, unsigned SrcReg); // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 3ed61a267f77..5f1f1f3580c1 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -216,13 +216,14 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; + SmallVector<TargetRegisterClass*, 4> CriticalPathRCs; if (EnablePostRAScheduler.getPosition() > 0) { if (!EnablePostRAScheduler) return false; } else { // Check that post-RA scheduling is enabled for this target. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); - if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode)) + if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; } @@ -243,7 +244,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn) : + (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); @@ -602,7 +603,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge, void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; ReleaseSucc(SU, &*I, IgnoreAntiDep); } } @@ -657,7 +660,7 @@ void SchedulePostRATDList::ListScheduleTopDown( available = true; for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(), E = SUnits[i].Preds.end(); I != E; ++I) { - if (I->getKind() != SDep::Anti) { + if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output)) { available = false; } else { SUnits[i].NumPredsLeft -= 1; @@ -736,7 +739,9 @@ void SchedulePostRATDList::ListScheduleTopDown( AntiDepBreaker::AntiDepRegVector AntiDepRegs; for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(), E = FoundSUnit->Preds.end(); I != E; ++I) { - if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled) + if (((I->getKind() == SDep::Anti) || + (I->getKind() == SDep::Output)) && + !I->getSUnit()->isScheduled) AntiDepRegs.push_back(I->getReg()); } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index cce5ae817a20..8f623452e27a 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -39,8 +39,10 @@ using namespace llvm; static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden); -static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden); +static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), + cl::Hidden); +static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), + cl::Hidden); STATISTIC(NumSplits, "Number of intervals split"); STATISTIC(NumRemats, "Number of intervals split by rematerialization"); @@ -131,17 +133,14 @@ namespace { private: - MachineBasicBlock::iterator - findNextEmptySlot(MachineBasicBlock*, MachineInstr*, - SlotIndex&); MachineBasicBlock::iterator findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet<MachineInstr*, 4>&, SlotIndex&); + SmallPtrSet<MachineInstr*, 4>&); MachineBasicBlock::iterator findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex, - SmallPtrSet<MachineInstr*, 4>&, SlotIndex&); + SmallPtrSet<MachineInstr*, 4>&); int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); @@ -161,7 +160,6 @@ namespace { bool Rematerialize(unsigned vreg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - SlotIndex RestoreIdx, SmallPtrSet<MachineInstr*, 4>& RefsInMBB); MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, MachineInstr* DefMI, @@ -208,24 +206,6 @@ X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting"); const PassInfo *const llvm::PreAllocSplittingID = &X; - -/// findNextEmptySlot - Find a gap after the given machine instruction in the -/// instruction index map. If there isn't one, return end(). -MachineBasicBlock::iterator -PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, - SlotIndex &SpotIndex) { - MachineBasicBlock::iterator MII = MI; - if (++MII != MBB->end()) { - SlotIndex Index = - LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); - if (Index != SlotIndex()) { - SpotIndex = Index; - return MII; - } - } - return MBB->end(); -} - /// findSpillPoint - Find a gap as far away from the given MI that's suitable /// for spilling the current live interval. The index must be before any /// defs and uses of the live interval register in the mbb. Return begin() if @@ -233,8 +213,7 @@ PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, MachineBasicBlock::iterator PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, MachineInstr *DefMI, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB, - SlotIndex &SpillIndex) { + SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { MachineBasicBlock::iterator Pt = MBB->begin(); MachineBasicBlock::iterator MII = MI; @@ -247,8 +226,6 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; while (MII != EndPt && !RefsInMBB.count(MII)) { - SlotIndex Index = LIs->getInstructionIndex(MII); - // We can't insert the spill between the barrier (a call), and its // corresponding call frame setup. if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) { @@ -259,9 +236,8 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, } } continue; - } else if (LIs->hasGapBeforeInstr(Index)) { + } else { Pt = MII; - SpillIndex = LIs->findGapBeforeInstr(Index, true); } if (RefsInMBB.count(MII)) @@ -281,8 +257,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, MachineBasicBlock::iterator PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, SlotIndex LastIdx, - SmallPtrSet<MachineInstr*, 4> &RefsInMBB, - SlotIndex &RestoreIndex) { + SmallPtrSet<MachineInstr*, 4> &RefsInMBB) { // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb // begin index accordingly. MachineBasicBlock::iterator Pt = MBB->end(); @@ -306,7 +281,6 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, SlotIndex Index = LIs->getInstructionIndex(MII); if (Index > LastIdx) break; - SlotIndex Gap = LIs->findGapBeforeInstr(Index); // We can't insert a restore between the barrier (a call) and its // corresponding call frame teardown. @@ -315,9 +289,8 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; ++MII; } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else if (Gap != SlotIndex()) { + } else { Pt = MII; - RestoreIndex = Gap; } if (RefsInMBB.count(MII)) @@ -339,7 +312,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, if (I != IntervalSSMap.end()) { SS = I->second; } else { - SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); IntervalSSMap[Reg] = SS; } @@ -364,10 +337,10 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, if (!DefMBB) return false; - DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg); + DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg); if (I == IntervalSSMap.end()) return false; - DenseMap<SlotIndex, SlotIndex>::iterator + DenseMap<SlotIndex, SlotIndex>::const_iterator II = Def2SpillMap.find(DefIndex); if (II == Def2SpillMap.end()) return false; @@ -740,7 +713,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { DefIdx = DefIdx.getDefIndex(); assert(DI->getOpcode() != TargetInstrInfo::PHI && - "Following NewVN isPHIDef flag incorrect. Fix me!"); + "PHI instr in code during pre-alloc splitting."); VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); // If the def is a move, set the copy field. @@ -896,25 +869,22 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - SlotIndex RestoreIdx, SmallPtrSet<MachineInstr*, 4>& RefsInMBB) { MachineBasicBlock& MBB = *RestorePt->getParent(); MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - SlotIndex KillIdx; if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) - KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); + KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); else - KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx); + KillPt = next(MachineBasicBlock::iterator(DefMI)); if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI); - LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI); + SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); ReconstructLiveInterval(CurrLI); - SlotIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt)); RematIdx = RematIdx.getDefIndex(); RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); @@ -955,7 +925,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (I != IntervalSSMap.end()) { SS = I->second; } else { - SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); } MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), @@ -1086,17 +1056,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Find a point to restore the value after the barrier. - SlotIndex RestoreIndex; MachineBasicBlock::iterator RestorePt = - findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex); + findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB); if (RestorePt == BarrierMBB->end()) { DEBUG(errs() << "FAILED (could not find a suitable restore point).\n"); return false; } if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI)) - if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, - RestoreIndex, RefsInMBB)) { + if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) { DEBUG(errs() << "success (remat).\n"); return true; } @@ -1114,7 +1082,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SpillIndex = LIs->getInstructionIndex(SpillMI); } else { MachineBasicBlock::iterator SpillPt = - findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex); + findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); if (SpillPt == BarrierMBB->begin()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. @@ -1124,10 +1092,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SS = CreateSpillStackSlot(CurrLI->reg, RC); TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC); SpillMI = prior(SpillPt); - LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); } } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def, - RestoreIndex, SpillIndex, SS)) { + LIs->getZeroIndex(), SpillIndex, SS)) { // If it's already split, just restore the value. There is no need to spill // the def again. if (!DefMI) { @@ -1144,13 +1112,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { if (DefMBB == BarrierMBB) { // Add spill after the def and the last use before the barrier. SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI, - RefsInMBB, SpillIndex); + RefsInMBB); if (SpillPt == DefMBB->begin()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. } } else { - SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex); + SpillPt = next(MachineBasicBlock::iterator(DefMI)); if (SpillPt == DefMBB->end()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. @@ -1160,7 +1128,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SS = CreateSpillStackSlot(CurrLI->reg, RC); TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC); SpillMI = prior(SpillPt); - LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); + SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI); } } @@ -1170,6 +1138,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // Add restore. bool FoldedRestore = false; + SlotIndex RestoreIndex; if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier, BarrierMBB, SS, RefsInMBB)) { RestorePt = LMI; @@ -1178,7 +1147,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } else { TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC); MachineInstr *LoadMI = prior(RestorePt); - LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex); + RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI); } // Update spill stack slot live interval. @@ -1398,7 +1367,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { // Otherwise, this is a load-store case, so DCE them. for (SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end(); - UI != UI; ++UI) { + UI != UE; ++UI) { LIs->RemoveMachineInstrFromMaps(*UI); (*UI)->eraseFromParent(); } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 48567a0fc7f6..455964b5c5ad 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -77,6 +77,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { SmallVector<MachineInstr*, 8> ImpDefMIs; MachineBasicBlock *Entry = fn.begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; + SmallPtrSet<MachineInstr*, 8> ModInsts; for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); @@ -201,6 +202,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineOperand &RMO = UI.getOperand(); MachineInstr *RMI = &*UI; ++UI; + if (ModInsts.count(RMI)) + continue; MachineBasicBlock *RMBB = RMI->getParent(); if (RMBB == MBB) continue; @@ -209,9 +212,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && Reg == SrcReg) { + if (RMO.isKill()) { + LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + vi.removeKill(RMI); + } RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j) RMI->RemoveOperand(j); + ModInsts.insert(RMI); continue; } @@ -222,6 +230,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { RMO.setIsKill(); } } + ModInsts.clear(); ImpDefRegs.clear(); ImpDefMIs.clear(); } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 230a20c2a3c8..8905f757a073 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -264,7 +264,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset); + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, + true, false); } I->setFrameIdx(FrameIdx); diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 5507646878cb..7fb3e6e6d2d2 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -43,35 +43,14 @@ static const char *const PSVNames[] = { // Eventually these should be uniqued on LLVMContext rather than in a managed // static. For now, we can safely use the global context for the time being to // squeak by. -PseudoSourceValue::PseudoSourceValue() : +PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : Value(Type::getInt8PtrTy(getGlobalContext()), - PseudoSourceValueVal) {} + Subclass) {} void PseudoSourceValue::printCustom(raw_ostream &O) const { O << PSVNames[this - *PSVs]; } -namespace { - /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue - /// for holding FixedStack values, which must include a frame - /// index. - class FixedStackPseudoSourceValue : public PseudoSourceValue { - const int FI; - public: - explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {} - - virtual bool isConstant(const MachineFrameInfo *MFI) const; - - virtual bool isAliased(const MachineFrameInfo *MFI) const; - - virtual bool mayAlias(const MachineFrameInfo *) const; - - virtual void printCustom(raw_ostream &OS) const { - OS << "FixedStack" << FI; - } - }; -} - static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues; const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) { @@ -130,3 +109,7 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { // Spill slots will not alias any LLVM IR value. return !MFI->isSpillSlotObjectIndex(FI); } + +void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { + OS << "FixedStack" << FI; +} diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 1957c16a2fd5..7bb020a65e7e 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -261,8 +261,8 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return SS; // Already has space allocated? // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(),true); + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); // Assign the slot... StackSlotForVirtReg[VirtReg] = FrameIdx; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 5757e4755469..c677d341bef9 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -693,6 +693,11 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, } bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { + + // Assert that this is a valid solution to the regalloc problem. + assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() && + "Invalid (infinite cost) solution for PBQP problem."); + // Set to true if we have any spills bool anotherRoundNeeded = false; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index cf90aba86b30..94680ed29921 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -100,11 +100,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { CalleeSavedRegs.set(CSRegs[i]); } - // RS used within emit{Pro,Epi}logue() - if (mbb != MBB) { - MBB = mbb; - initRegState(); - } + MBB = mbb; + initRegState(); Tracking = false; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 1363a92fed67..6b27db263b25 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -214,7 +214,10 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) { unsigned MaxPredDepth = 0; for (SUnit::const_pred_iterator I = Cur->Preds.begin(), E = Cur->Preds.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + SUnit *PredSU = I->getSUnit(); if (PredSU->isDepthCurrent) MaxPredDepth = std::max(MaxPredDepth, @@ -248,7 +251,10 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) { unsigned MaxSuccHeight = 0; for (SUnit::const_succ_iterator I = Cur->Succs.begin(), E = Cur->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue; + if (IgnoreAntiDep && + ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) + continue; + SUnit *SuccSU = I->getSUnit(); if (SuccSU->isHeightCurrent) MaxSuccHeight = std::max(MaxSuccHeight, diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f8b219d64133..56dd53345996 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -112,12 +112,13 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, V = getUnderlyingObject(V); if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { - MayAlias = PSV->mayAlias(MFI); // For now, ignore PseudoSourceValues which may alias LLVM IR values // because the code that uses this function has no way to cope with // such aliases. if (PSV->isAliased(MFI)) return 0; + + MayAlias = PSV->mayAlias(MFI); return V; } @@ -127,23 +128,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -static bool mayUnderlyingObjectForInstrAlias(const MachineInstr *MI, - const MachineFrameInfo *MFI) { - if (!MI->hasOneMemOperand() || - !(*MI->memoperands_begin())->getValue() || - (*MI->memoperands_begin())->isVolatile()) - return true; - - const Value *V = (*MI->memoperands_begin())->getValue(); - if (!V) - return true; - - V = getUnderlyingObject(V); - if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) - return PSV->mayAlias(MFI); - return true; -} - void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) { @@ -163,16 +147,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // We build scheduling units by walking a block's instruction list from bottom // to top. - // Remember where a generic side-effecting instruction is as we procede. If - // ChainMMO is null, this is assumed to have arbitrary side-effects. If - // ChainMMO is non-null, then Chain makes only a single memory reference. - SUnit *Chain = 0; - MachineMemOperand *ChainMMO = 0; + // Remember where a generic side-effecting instruction is as we procede. + SUnit *BarrierChain = 0, *AliasChain = 0; - // Memory references to specific known memory locations are tracked so that - // they can be given more precise dependencies. - std::map<const Value *, SUnit *> MemDefs; - std::map<const Value *, std::vector<SUnit *> > MemUses; + // Memory references to specific known memory locations are tracked + // so that they can be given more precise dependencies. We track + // separately the known memory locations that may alias and those + // that are known not to alias + std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; + std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; // Check to see if the scheduler cares about latencies. bool UnitLatencies = ForceUnitLatencies(); @@ -347,114 +330,132 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (TID.isCall() || TID.hasUnmodeledSideEffects()) { - new_chain: - // This is the conservative case. Add dependencies on all memory - // references. - if (Chain) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - Chain = SU; + if (TID.isCall() || TID.hasUnmodeledSideEffects() || + (MI->hasVolatileMemoryRef() && + (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { + // Be conservative with these and add dependencies on all memory + // references, even those that are known to not alias. + for (std::map<const Value *, SUnit *>::iterator I = + NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } + for (std::map<const Value *, std::vector<SUnit *> >::iterator I = + NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); + } + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); + // Add SU to the barrier chain. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + BarrierChain = SU; + + // fall-through + new_alias_chain: + // Chain all possibly aliasing memory references though SU. + if (AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); - PendingLoads.clear(); - for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), - E = MemDefs.end(); I != E; ++I) { + for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), + E = AliasMemDefs.end(); I != E; ++I) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - I->second = SU; } for (std::map<const Value *, std::vector<SUnit *> >::iterator I = - MemUses.begin(), E = MemUses.end(); I != E; ++I) { + AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); - I->second.clear(); - I->second.push_back(SU); } - // See if it is known to just have a single memory reference. - MachineInstr *ChainMI = Chain->getInstr(); - const TargetInstrDesc &ChainTID = ChainMI->getDesc(); - if (!ChainTID.isCall() && - !ChainTID.hasUnmodeledSideEffects() && - ChainMI->hasOneMemOperand() && - !(*ChainMI->memoperands_begin())->isVolatile() && - (*ChainMI->memoperands_begin())->getValue()) - // We know that the Chain accesses one specific memory location. - ChainMMO = *ChainMI->memoperands_begin(); - else - // Unknown memory accesses. Assume the worst. - ChainMMO = 0; + PendingLoads.clear(); + AliasMemDefs.clear(); + AliasMemUses.clear(); } else if (TID.mayStore()) { bool MayAlias = true; TrueMemOrderLatency = STORE_LOAD_LATENCY; if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { // A store to a specific PseudoSourceValue. Add precise dependencies. - // Handle the def in MemDefs, if there is one. - std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); - if (I != MemDefs.end()) { + // Record the def in MemDefs, first adding a dep if there is + // an existing def. + std::map<const Value *, SUnit *>::iterator I = + ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + std::map<const Value *, SUnit *>::iterator IE = + ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) { I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/true)); I->second = SU; } else { - MemDefs[V] = SU; + if (MayAlias) + AliasMemDefs[V] = SU; + else + NonAliasMemDefs[V] = SU; } // Handle the uses in MemUses, if there are any. std::map<const Value *, std::vector<SUnit *> >::iterator J = - MemUses.find(V); - if (J != MemUses.end()) { + ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + std::map<const Value *, std::vector<SUnit *> >::iterator JE = + ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency, /*Reg=*/0, /*isNormalMemory=*/true)); J->second.clear(); } if (MayAlias) { - // Add dependencies from all the PendingLoads, since without - // memoperands we must assume they alias anything. + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency)); - // Add a general dependence too, if needed. - if (Chain) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + // Add dependence on alias chain, if needed. + if (AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } + // Add dependence on barrier chain, if needed. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); } else { // Treat all other stores conservatively. - goto new_chain; + goto new_alias_chain; } } else if (TID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! - } else if (const Value *V = - getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { - // A load from a specific PseudoSourceValue. Add precise dependencies. - std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V); - if (I != MemDefs.end()) - I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, - /*isNormalMemory=*/true)); - MemUses[V].push_back(SU); - - // Add a general dependence too, if needed. - if (Chain && (!ChainMMO || - (ChainMMO->isStore() || ChainMMO->isVolatile()))) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - } else if (MI->hasVolatileMemoryRef()) { - // Treat volatile loads conservatively. Note that this includes - // cases where memoperand information is unavailable. - goto new_chain; } else { - // A "MayAlias" load. Depend on the general chain, as well as on - // all stores. In the absense of MachineMemOperand information, - // we can't even assume that the load doesn't alias well-behaved - // memory locations. - if (Chain) - Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); - for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(), - E = MemDefs.end(); I != E; ++I) { - SUnit *DefSU = I->second; - if (mayUnderlyingObjectForInstrAlias(DefSU->getInstr(), MFI)) - DefSU->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (const Value *V = + getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { + // A load from a specific PseudoSourceValue. Add precise dependencies. + std::map<const Value *, SUnit *>::iterator I = + ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + std::map<const Value *, SUnit *>::iterator IE = + ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0, + /*isNormalMemory=*/true)); + if (MayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); + } else { + // A load with no underlying object. Depend on all + // potentially aliasing stores. + for (std::map<const Value *, SUnit *>::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) + I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + + PendingLoads.push_back(SU); + MayAlias = true; } - PendingLoads.push_back(SU); - } + + // Add dependencies on alias and barrier chains, if needed. + if (MayAlias && AliasChain) + AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0)); + } } } diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp index fbe40b678639..38839c44131a 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -77,6 +77,21 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, } } +/// CheckReturn - Analyze the return values of a function, returning true if +/// the return can be performed without sret-demotion, and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = OutTys.size(); i != e; ++i) { + EVT VT = OutTys[i]; + ISD::ArgFlagsTy ArgFlags = ArgsFlags[i]; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + /// AnalyzeReturn - Analyze the returned values of a return, /// incorporating info about the result values into this state. void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5f70cb85d923..06ffdd63881f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <set> using namespace llvm; STATISTIC(NodesCombined , "Number of dag nodes combined"); @@ -4443,14 +4442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - // never taken branch, fold to chain - if (N1C && N1C->isNullValue()) - return Chain; - // unconditional branch - if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && @@ -4517,22 +4515,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + // Use SimplifySetCC to simplify SETCC's. SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), N->getDebugLoc(), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); - ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode()); - - // fold br_cc true, dest -> br dest (unconditional branch) - if (SCCC && !SCCC->isNullValue()) - return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, - N->getOperand(0), N->getOperand(4)); - // fold br_cc false, dest -> unconditional fall through - if (SCCC && SCCC->isNullValue()) - return N->getOperand(0); - // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 8e955aff98fe..7dbc136f3a62 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -324,82 +325,12 @@ bool FastISel::SelectCall(User *I) { unsigned IID = F->getIntrinsicID(); switch (IID) { default: break; - case Intrinsic::dbg_stoppoint: { - DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); - if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::None)) - setCurDebugLoc(ExtractDebugLocation(*SPI, MF.getDebugLocInfo())); + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + // FIXME - Remove this instructions once the dust settles. return true; - } - case Intrinsic::dbg_region_start: { - DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I); - if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW - && DW->ShouldEmitDwarfDebug()) { - unsigned ID = - DW->RecordRegionStart(RSI->getContext()); - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - BuildMI(MBB, DL, II).addImm(ID); - } - return true; - } - case Intrinsic::dbg_region_end: { - DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I); - if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW - && DW->ShouldEmitDwarfDebug()) { - unsigned ID = 0; - DISubprogram Subprogram(REI->getContext()); - if (isInlinedFnEnd(*REI, MF.getFunction())) { - // This is end of an inlined function. - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - ID = DW->RecordInlinedFnEnd(Subprogram); - if (ID) - // Returned ID is 0 if this is unbalanced "end of inlined - // scope". This could happen if optimizer eats dbg intrinsics - // or "beginning of inlined scope" is not recoginized due to - // missing location info. In such cases, ignore this region.end. - BuildMI(MBB, DL, II).addImm(ID); - } else { - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - ID = DW->RecordRegionEnd(REI->getContext()); - BuildMI(MBB, DL, II).addImm(ID); - } - } - return true; - } - case Intrinsic::dbg_func_start: { - DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); - if (!isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::None) || !DW - || !DW->ShouldEmitDwarfDebug()) - return true; - - if (isInlinedFnStart(*FSI, MF.getFunction())) { - // This is a beginning of an inlined function. - - // If llvm.dbg.func.start is seen in a new block before any - // llvm.dbg.stoppoint intrinsic then the location info is unknown. - // FIXME : Why DebugLoc is reset at the beginning of each block ? - DebugLoc PrevLoc = DL; - if (PrevLoc.isUnknown()) - return true; - // Record the source line. - setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); - - DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(FSI->getSubprogram()); - unsigned LabelID = - DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope), - PrevLocTpl.Line, PrevLocTpl.Col); - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - BuildMI(MBB, DL, II).addImm(LabelID); - return true; - } - - // This is a beginning of a new function. - MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); - - // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(FSI->getSubprogram()); - return true; - } case Intrinsic::dbg_declare: { DbgDeclareInst *DI = cast<DbgDeclareInst>(I); if (!isValidDebugInfoIntrinsic(*DI, CodeGenOpt::None) || !DW @@ -416,11 +347,13 @@ bool FastISel::SelectCall(User *I) { StaticAllocaMap.find(AI); if (SI == StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; - if (MMI) - MMI->setVariableDbgInfo(DI->getVariable(), FI); -#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN - DW->RecordVariable(DI->getVariable(), FI); -#endif + if (MMI) { + MetadataContext &TheMetadata = + DI->getParent()->getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + MDNode *Dbg = TheMetadata.getMD(MDDbgKind, DI); + MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg); + } return true; } case Intrinsic::eh_exception: { diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index da311eddcae9..52b0832b0616 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -497,7 +497,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, assert(isNew && "Node emitted out of order - early"); } -/// EmitNode - Generate machine code for an node and needed dependencies. +/// EmitNode - Generate machine code for a node and needed dependencies. /// void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap, diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index bb4634d04b2a..91817e4d38a4 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -97,7 +97,7 @@ public: /// MachineInstr. static unsigned CountOperands(SDNode *Node); - /// EmitNode - Generate machine code for an node and needed dependencies. + /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f389f7f0c9df..4f0a229a505e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -148,8 +148,11 @@ private: SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); @@ -1810,10 +1813,19 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); } else if (ConstantSDNode *V = dyn_cast<ConstantSDNode>(Node->getOperand(i))) { - CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + if (OpVT==EltVT) + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } } else { assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); - const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext()); + const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } @@ -1909,6 +1921,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, } SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, @@ -1916,9 +1929,10 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i16: LC = Call_I16; break; - case MVT::i32: LC = Call_I32; break; - case MVT::i64: LC = Call_I64; break; + case MVT::i8: LC = Call_I8; break; + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; case MVT::i128: LC = Call_I128; break; } return ExpandLibCall(LC, Node, isSigned); @@ -2624,10 +2638,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); } else if (isSigned) { - Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32, + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128); } else { - Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32, + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128); } Results.push_back(Tmp1); @@ -2643,10 +2661,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) - Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + Tmp1 = ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, RTLIB::SDIV_I64, RTLIB::SDIV_I128); else - Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, RTLIB::UDIV_I64, RTLIB::UDIV_I128); Results.push_back(Tmp1); break; @@ -2691,7 +2713,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Node->getOperand(1))); break; } - Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32, + Tmp1 = ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, RTLIB::MUL_I64, RTLIB::MUL_I128); Results.push_back(Tmp1); break; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98e7317b493a..4530ffc4a2d0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1270,11 +1270,12 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, return Val; FoldingSetNodeID ID; + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); - SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1378,7 +1379,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { unsigned StackAlign = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); - int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); } @@ -1394,7 +1395,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { TD->getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); return getFrameIndex(FrameIdx, TLI.getPointerTy()); } @@ -5814,9 +5815,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); - OS << " "; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; + if (i) OS << ", "; else OS << " "; OS << (void*)getOperand(i).getNode(); if (unsigned RN = getOperand(i).getResNo()) OS << ":" << RN; @@ -5916,7 +5916,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, - unsigned MinSplatBits) { + unsigned MinSplatBits, + bool isBigEndian) { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); @@ -5933,12 +5934,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned int nOps = getNumOperands(); assert(nOps > 0 && "isConstantSplat has 0-size build vector"); unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); - for (unsigned i = 0; i < nOps; ++i) { + + for (unsigned j = 0; j < nOps; ++j) { + unsigned i = isBigEndian ? nOps-1-j : j; SDValue OpVal = getOperand(i); - unsigned BitPos = i * EltBitSize; + unsigned BitPos = j * EltBitSize; if (OpVal.getOpcode() == ISD::UNDEF) - SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize); + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). zextOrTrunc(sz) << BitPos); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index c0d2a4d39a32..90fd95eb6352 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -26,6 +26,7 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" @@ -304,7 +305,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align); + MF->getFrameInfo()->CreateStackObject(TySize, Align, false); } for (; BB != EB; ++BB) @@ -334,25 +335,6 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, DebugLoc DL; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (Function *F = CI->getCalledFunction()) { - switch (F->getIntrinsicID()) { - default: break; - case Intrinsic::dbg_stoppoint: { - DbgStopPointInst *SPI = cast<DbgStopPointInst>(I); - if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::Default)) - DL = ExtractDebugLocation(*SPI, MF->getDebugLocInfo()); - break; - } - case Intrinsic::dbg_func_start: { - DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I); - if (isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::Default)) - DL = ExtractDebugLocation(*FSI, MF->getDebugLocInfo()); - break; - } - } - } - } PN = dyn_cast<PHINode>(I); if (!PN || PN->use_empty()) continue; @@ -947,58 +929,143 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); } +/// Get the EVTs and ArgFlags collections that represent the return type +/// of the given function. This does not require a DAG or a return value, and +/// is suitable for use before any DAGs for the function are constructed. +static void getReturnInfo(const Type* ReturnType, + Attributes attr, SmallVectorImpl<EVT> &OutVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags, + TargetLowering &TLI, + SmallVectorImpl<uint64_t> *Offsets = 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets); + unsigned NumValues = ValueVTs.size(); + if ( NumValues == 0 ) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + OutVTs.push_back(PartVT); + OutFlags.push_back(Flags); + } + } +} void SelectionDAGLowering::visitRet(ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + + if (!FLI.CanLowerReturn) { + unsigned DemoteReg = FLI.DemoteRegister; + const Function *F = I.getParent()->getParent(); + + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector<EVT, 1> PtrValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + PtrValueVTs); + + SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); + SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) continue; - - SDValue RetOp = getValue(I.getOperand(i)); - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - const Function *F = I.getParent()->getParent(); - if (F->paramHasAttr(0, Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (F->paramHasAttr(0, Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; + SmallVector<SDValue, 4> Chains(NumValues); + EVT PtrVT = PtrValueVTs[0]; + for (unsigned i = 0; i != NumValues; ++i) + Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr, + DAG.getConstant(Offsets[i], PtrVT)), + NULL, Offsets[i], false, 0); + Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + } + else { + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) continue; + + SDValue RetOp = getValue(I.getOperand(i)); + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + const Function *F = I.getParent()->getParent(); + if (F->paramHasAttr(0, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->paramHasAttr(0, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurDebugLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (F->paramHasAttr(0, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (F->paramHasAttr(0, Attribute::SExt)) + Flags.setSExt(); + else if (F->paramHasAttr(0, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } - - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); - SmallVector<SDValue, 4> Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, ExtendKind); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->paramHasAttr(0, Attribute::InReg)) - Flags.setInReg(); - - // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) - Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } } @@ -1691,19 +1758,19 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); - const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); - size_t TSize = 0; + APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize <= 3) + if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4))) return false; APInt Range = ComputeRange(First, Last); - double Density = (double)TSize / Range.roundToDouble(); + double Density = TSize.roundToDouble() / Range.roundToDouble(); if (Density < 0.4) return false; @@ -1797,32 +1864,34 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, // Size is the number of Cases represented by this range. unsigned Size = CR.Range.second - CR.Range.first; - const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue(); - const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue(); + const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue(); + const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue(); double FMetric = 0; CaseItr Pivot = CR.Range.first + Size/2; // Select optimal pivot, maximizing sum density of LHS and RHS. This will // (heuristically) allow us to emit JumpTable's later. - size_t TSize = 0; + APInt TSize(First.getBitWidth(), 0); for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) TSize += I->size(); - size_t LSize = FrontCase.size(); - size_t RSize = TSize-LSize; + APInt LSize = FrontCase.size(); + APInt RSize = TSize-LSize; DEBUG(errs() << "Selecting best pivot: \n" << "First: " << First << ", Last: " << Last <<'\n' << "LSize: " << LSize << ", RSize: " << RSize << '\n'); for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; J!=E; ++I, ++J) { - const APInt& LEnd = cast<ConstantInt>(I->High)->getValue(); - const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue(); + const APInt &LEnd = cast<ConstantInt>(I->High)->getValue(); + const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue(); APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble(); + double LDensity = (double)LSize.roundToDouble() / + (LEnd - First + 1ULL).roundToDouble(); + double RDensity = (double)RSize.roundToDouble() / + (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place DEBUG(errs() <<"=>Step\n" @@ -3842,112 +3911,12 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } - case Intrinsic::dbg_stoppoint: { - DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); - if (isValidDebugInfoIntrinsic(SPI, CodeGenOpt::Default)) { - MachineFunction &MF = DAG.getMachineFunction(); - DebugLoc Loc = ExtractDebugLocation(SPI, MF.getDebugLocInfo()); - setCurDebugLoc(Loc); - - if (OptLevel == CodeGenOpt::None) - DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(), - SPI.getLine(), - SPI.getColumn(), - SPI.getContext())); - } + case Intrinsic::dbg_stoppoint: + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + // FIXME - Remove this instructions once the dust settles. return 0; - } - case Intrinsic::dbg_region_start: { - DwarfWriter *DW = DAG.getDwarfWriter(); - DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I); - if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW - && DW->ShouldEmitDwarfDebug()) { - unsigned LabelID = - DW->RecordRegionStart(RSI.getContext()); - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), LabelID)); - } - return 0; - } - case Intrinsic::dbg_region_end: { - DwarfWriter *DW = DAG.getDwarfWriter(); - DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I); - - if (!isValidDebugInfoIntrinsic(REI, OptLevel) || !DW - || !DW->ShouldEmitDwarfDebug()) - return 0; - - MachineFunction &MF = DAG.getMachineFunction(); - DISubprogram Subprogram(REI.getContext()); - - if (isInlinedFnEnd(REI, MF.getFunction())) { - // This is end of inlined function. Debugging information for inlined - // function is not handled yet (only supported by FastISel). - if (OptLevel == CodeGenOpt::None) { - unsigned ID = DW->RecordInlinedFnEnd(Subprogram); - if (ID != 0) - // Returned ID is 0 if this is unbalanced "end of inlined - // scope". This could happen if optimizer eats dbg intrinsics or - // "beginning of inlined scope" is not recoginized due to missing - // location info. In such cases, do ignore this region.end. - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), ID)); - } - return 0; - } - - unsigned LabelID = - DW->RecordRegionEnd(REI.getContext()); - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), LabelID)); - return 0; - } - case Intrinsic::dbg_func_start: { - DwarfWriter *DW = DAG.getDwarfWriter(); - DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); - if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None)) - return 0; - - MachineFunction &MF = DAG.getMachineFunction(); - // This is a beginning of an inlined function. - if (isInlinedFnStart(FSI, MF.getFunction())) { - if (OptLevel != CodeGenOpt::None) - // FIXME: Debugging informaation for inlined function is only - // supported at CodeGenOpt::Node. - return 0; - - DebugLoc PrevLoc = CurDebugLoc; - // If llvm.dbg.func.start is seen in a new block before any - // llvm.dbg.stoppoint intrinsic then the location info is unknown. - // FIXME : Why DebugLoc is reset at the beginning of each block ? - if (PrevLoc.isUnknown()) - return 0; - - // Record the source line. - setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); - - if (!DW || !DW->ShouldEmitDwarfDebug()) - return 0; - DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(FSI.getSubprogram()); - DICompileUnit CU(PrevLocTpl.Scope); - unsigned LabelID = DW->RecordInlinedFnStart(SP, CU, - PrevLocTpl.Line, - PrevLocTpl.Col); - DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), - getRoot(), LabelID)); - return 0; - } - - // This is a beginning of a new function. - MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); - - if (!DW || !DW->ShouldEmitDwarfDebug()) - return 0; - // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(FSI.getSubprogram()); - return 0; - } case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. @@ -3972,13 +3941,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { if (SI == FuncInfo.StaticAllocaMap.end()) return 0; // VLAs. int FI = SI->second; -#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - if (MMI) - MMI->setVariableDbgInfo(Variable, FI); -#else - DW->RecordVariable(Variable, FI); -#endif + if (MMI) { + MetadataContext &TheMetadata = + DI.getParent()->getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI); + MMI->setVariableDbgInfo(Variable, FI, Dbg); + } return 0; } case Intrinsic::eh_exception: { @@ -4233,7 +4204,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); if (CI->getZExtValue() < 2) - setValue(&I, DAG.getConstant(-1U, Ty)); + setValue(&I, DAG.getConstant(-1ULL, Ty)); else setValue(&I, DAG.getConstant(0, Ty)); return 0; @@ -4355,6 +4326,16 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); case Intrinsic::atomic_swap: return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); + + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + return 0; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + return 0; } } @@ -4368,7 +4349,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { /// TargetLowering::IsEligibleForTailCallOptimization. /// static bool -isInTailCallPosition(const Instruction *I, Attributes RetAttr, +isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr, const TargetLowering &TLI) { const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -4395,9 +4376,14 @@ isInTailCallPosition(const Instruction *I, Attributes RetAttr, // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + // Conservatively require the attributes of the call to match those of - // the return. - if (F->getAttributes().getRetAttributes() != RetAttr) + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; // Otherwise, make sure the unmodified return value of I is the return value. @@ -4431,15 +4417,52 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, MachineBasicBlock *LandingPad) { const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + const Type *RetTy = FTy->getReturnType(); MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); unsigned BeginLabel = 0, EndLabel = 0; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); - unsigned j = 1; + + // Check whether the function can return without sret-demotion. + SmallVector<EVT, 4> OutVTs; + SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; + SmallVector<uint64_t, 4> Offsets; + getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + OutVTs, OutsFlags, TLI, &Offsets); + + + bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), + FTy->isVarArg(), OutVTs, OutsFlags, DAG); + + SDValue DemoteStackSlot; + + if (!CanLowerReturn) { + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( + FTy->getReturnType()); + unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( + FTy->getReturnType()); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); + + DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.Alignment = Align; + Args.push_back(Entry); + RetTy = Type::getVoidTy(FTy->getContext()); + } + for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i, ++j) { + i != e; ++i) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); @@ -4475,7 +4498,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, isTailCall = false; std::pair<SDValue,SDValue> Result = - TLI.LowerCallTo(getRoot(), CS.getType(), + TLI.LowerCallTo(getRoot(), RetTy, CS.paramHasAttr(0, Attribute::SExt), CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), @@ -4489,6 +4512,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, "Null value expected with tail call!"); if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); + else if (!CanLowerReturn && Result.second.getNode()) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); + + ComputeValueVTs(TLI, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + unsigned NumValues = OutVTs.size(); + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, + DAG.getConstant(Offsets[i], PtrVT)), + NULL, Offsets[i], false, 1); + Values[i] = L; + Chains[i] = L.getValue(1); + } + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], NumValues); + PendingLoads.push_back(Chain); + + setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES, + getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues), + &Values[0], NumValues)); + } // As a special case, a null chain means that a tail call has // been emitted and the DAG root is already updated. if (Result.second.getNode()) @@ -5229,7 +5281,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), OpInfo.CallOperand, StackSlot, NULL, 0); @@ -5757,9 +5809,32 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDL->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); + SmallVector<ISD::InputArg, 16> Ins; + + // Check whether the function can return without sret-demotion. + SmallVector<EVT, 4> OutVTs; + SmallVector<ISD::ArgFlagsTy, 4> OutsFlags; + getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + OutVTs, OutsFlags, TLI); + FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + + FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), + OutVTs, OutsFlags, DAG); + if (!FLI.CanLowerReturn) { + // Put in an sret pointer parameter before all the other parameters. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + + // NOTE: Assuming that a pointer will never break down to more than one VT + // or one register. + ISD::ArgFlagsTy Flags; + Flags.setSRet(); + EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, true); + Ins.push_back(RetArg); + } // Set up the incoming argument description vector. - SmallVector<ISD::InputArg, 16> Ins; unsigned Idx = 1; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { @@ -5837,6 +5912,28 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // Set up the argument values. unsigned i = 0; Idx = 1; + if (!FLI.CanLowerReturn) { + // Create a virtual register for the sret pointer, and put in a copy + // from the sret argument into it. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + EVT VT = ValueVTs[0]; + EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, + VT, AssertOp); + + MachineFunction& MF = SDL->DAG.getMachineFunction(); + MachineRegisterInfo& RegInfo = MF.getRegInfo(); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + FLI.DemoteRegister = SRetReg; + NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue); + DAG.setRoot(NewRoot); + + // i indexes lowered arguments. Bump it past the hidden sret argument. + // Idx indexes LLVM arguments. Don't touch it. + ++i; + } for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector<SDValue, 4> ArgValues; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index a0ec7aabd8a7..10f256c15306 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -90,6 +90,14 @@ public: MachineFunction *MF; MachineRegisterInfo *RegInfo; + /// CanLowerReturn - true iff the function's return value can be lowered to + /// registers. + bool CanLowerReturn; + + /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg + /// allocated to hold a pointer to the hidden sret parameter. + unsigned DemoteRegister; + explicit FunctionLoweringInfo(TargetLowering &TLI); /// set - Initialize this FunctionLoweringInfo with the given Function @@ -193,9 +201,9 @@ class SelectionDAGLowering { Case() : Low(0), High(0), BB(0) { } Case(Constant* low, Constant* high, MachineBasicBlock* bb) : Low(low), High(high), BB(bb) { } - uint64_t size() const { - uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue(); - uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue(); + APInt size() const { + const APInt &rHigh = cast<ConstantInt>(High)->getValue(); + const APInt &rLow = cast<ConstantInt>(Low)->getValue(); return (rHigh - rLow + 1ULL); } }; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b63d5bb2e6a1..ab5f21e4337c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -68,7 +68,7 @@ static cl::opt<bool> EnableFastISelAbort("fast-isel-abort", cl::Hidden, cl::desc("Enable abort calls when \"fast\" instruction fails")); static cl::opt<bool> -SchedLiveInCopies("schedule-livein-copies", +SchedLiveInCopies("schedule-livein-copies", cl::Hidden, cl::desc("Schedule copies of livein registers"), cl::init(false)); @@ -387,13 +387,14 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, if (MDDbgKind) { // Update DebugLoc if debug information is attached with this // instruction. - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); - SDL->setCurDebugLoc(Loc); - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(Loc); - } + if (!isa<DbgInfoIntrinsic>(I)) + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + SDL->setCurDebugLoc(Loc); + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); + } } if (!isa<TerminatorInst>(I)) SDL->visit(*I); @@ -750,14 +751,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, if (MDDbgKind) { // Update DebugLoc if debug information is attached with this // instruction. - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, - MF.getDebugLocInfo()); - FastIS->setCurDebugLoc(Loc); - if (MF.getDefaultDebugLoc().isUnknown()) - MF.setDefaultDebugLoc(Loc); - } + if (!isa<DbgInfoIntrinsic>(BI)) + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, + MF.getDebugLocInfo()); + FastIS->setCurDebugLoc(Loc); + if (MF.getDefaultDebugLoc().isUnknown()) + MF.setDefaultDebugLoc(Loc); + } } // Just before the terminator instruction, insert instructions to diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9f36b679f3b0..2ca52a48c2a9 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -22,7 +22,6 @@ #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -65,22 +64,27 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::SRA_I32] = "__ashrsi3"; Names[RTLIB::SRA_I64] = "__ashrdi3"; Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; Names[RTLIB::MUL_I16] = "__mulhi3"; Names[RTLIB::MUL_I32] = "__mulsi3"; Names[RTLIB::MUL_I64] = "__muldi3"; Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::SDIV_I8] = "__divqi3"; Names[RTLIB::SDIV_I16] = "__divhi3"; Names[RTLIB::SDIV_I32] = "__divsi3"; Names[RTLIB::SDIV_I64] = "__divdi3"; Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; Names[RTLIB::UDIV_I16] = "__udivhi3"; Names[RTLIB::UDIV_I32] = "__udivsi3"; Names[RTLIB::UDIV_I64] = "__udivdi3"; Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; Names[RTLIB::SREM_I16] = "__modhi3"; Names[RTLIB::SREM_I32] = "__modsi3"; Names[RTLIB::SREM_I64] = "__moddi3"; Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; Names[RTLIB::UREM_I16] = "__umodhi3"; Names[RTLIB::UREM_I32] = "__umodsi3"; Names[RTLIB::UREM_I64] = "__umoddi3"; @@ -2360,7 +2364,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. - std::string RegName(Constraint.begin()+1, Constraint.end()-1); + StringRef RegName(Constraint.data()+1, Constraint.size()-2); // Figure out which register class contains this reg. const TargetRegisterInfo *RI = TM.getRegisterInfo(); @@ -2383,7 +2387,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (StringsEqualNoCase(RegName, RI->getName(*I))) + if (RegName.equals_lower(RI->getName(*I))) return std::make_pair(*I, RC); } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index b5d6b471f472..3909c56bdbb2 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -709,7 +709,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, } MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index f3ad0d1cc0a7..f85384bc5c77 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -13,15 +13,43 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ManagedStatic.h" using namespace llvm; -std::auto_ptr<IndexListEntry> IndexListEntry::emptyKeyEntry, - IndexListEntry::tombstoneKeyEntry; + +// Yep - these are thread safe. See the header for details. +namespace { + + + class EmptyIndexListEntry : public IndexListEntry { + public: + EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {} + }; + + class TombstoneIndexListEntry : public IndexListEntry { + public: + TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {} + }; + + // The following statics are thread safe. They're read only, and you + // can't step from them to any other list entries. + ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey; + ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey; +} char SlotIndexes::ID = 0; static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering"); +IndexListEntry* IndexListEntry::getEmptyKeyEntry() { + return &*IndexListEntryEmptyKey; +} + +IndexListEntry* IndexListEntry::getTombstoneKeyEntry() { + return &*IndexListEntryTombstoneKey; +} + + void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(au); @@ -51,8 +79,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { mf = &fn; initList(); - const unsigned gap = 1; - // Check that the list contains only the sentinal. assert(indexListHead->getNext() == 0 && "Index list non-empty at initial numbering?"); @@ -64,14 +90,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { "MachineInstr -> Index mapping non-empty at initial numbering?"); functionSize = 0; - /* - for (unsigned s = 0; s < SlotIndex::NUM; ++s) { - indexList.push_back(createEntry(0, s)); - } - - unsigned index = gap * SlotIndex::NUM; - */ - unsigned index = 0; // Iterate over the the function. @@ -83,7 +101,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index)); SlotIndex blockStartIndex(back(), SlotIndex::LOAD); - index += gap * SlotIndex::NUM; + index += SlotIndex::NUM; for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -93,7 +111,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index)); terminatorGaps.insert( std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); - index += gap * SlotIndex::NUM; + index += SlotIndex::NUM; } // Insert a store index for the instr. @@ -109,14 +127,14 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { if (Slots == 0) Slots = 1; - index += (Slots + 1) * gap * SlotIndex::NUM; + index += (Slots + 1) * SlotIndex::NUM; } if (mbb->getFirstTerminator() == mbb->end()) { push_back(createEntry(0, index)); terminatorGaps.insert( std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); - index += gap * SlotIndex::NUM; + index += SlotIndex::NUM; } SlotIndex blockEndIndex(back(), SlotIndex::STORE); @@ -138,21 +156,36 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { return false; } -void SlotIndexes::renumber() { - assert(false && "SlotIndexes::runmuber is not fully implemented yet."); +void SlotIndexes::renumberIndexes() { - // Compute numbering as follows: - // Grab an iterator to the start of the index list. - // Iterate over all MBBs, and within each MBB all MIs, keeping the MI - // iterator in lock-step (though skipping it over indexes which have - // null pointers in the instruction field). - // At each iteration assert that the instruction pointed to in the index - // is the same one pointed to by the MI iterator. This + // Renumber updates the index of every element of the index list. + // If all instrs in the function have been allocated an index (which has been + // placed in the index list in the order of instruction iteration) then the + // resulting numbering will match what would have been generated by the + // pass during the initial numbering of the function if the new instructions + // had been present. - // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should - // only need to be set up once - when the first numbering is computed. + functionSize = 0; + unsigned index = 0; + + for (IndexListEntry *curEntry = front(); curEntry != getTail(); + curEntry = curEntry->getNext()) { - assert(false && "Renumbering not supported yet."); + curEntry->setIndex(index); + + if (curEntry->getInstr() == 0) { + // MBB start entry or terminator gap. Just step index by 1. + index += SlotIndex::NUM; + } + else { + ++functionSize; + unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs(); + if (Slots == 0) + Slots = 1; + + index += (Slots + 1) * SlotIndex::NUM; + } + } } void SlotIndexes::dump() const { @@ -167,7 +200,7 @@ void SlotIndexes::dump() const { } } - for (MBB2IdxMap::iterator itr = mbb2IdxMap.begin(); + for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); itr != mbb2IdxMap.end(); ++itr) { errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" << itr->second.first << ", " << itr->second.second << "]\n"; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 95e85be5b817..910732538e97 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -52,16 +52,16 @@ protected: /// Ensures there is space before the given machine instruction, returns the /// instruction's new number. SlotIndex makeSpaceBefore(MachineInstr *mi) { - if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { + //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { // FIXME: Should be updated to use rewrite-in-place methods when they're // introduced. Currently broken. //lis->scaleNumbering(2); //ls->scaleNumbering(2); - } + //} SlotIndex miIdx = lis->getInstructionIndex(mi); - assert(lis->hasGapBeforeInstr(miIdx)); + //assert(lis->hasGapBeforeInstr(miIdx)); return miIdx; } @@ -69,16 +69,16 @@ protected: /// Ensure there is space after the given machine instruction, returns the /// instruction's new number. SlotIndex makeSpaceAfter(MachineInstr *mi) { - if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { + //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { // FIXME: Should be updated to use rewrite-in-place methods when they're // introduced. Currently broken. // lis->scaleNumbering(2); // ls->scaleNumbering(2); - } + //} SlotIndex miIdx = lis->getInstructionIndex(mi); - assert(lis->hasGapAfterInstr(miIdx)); + //assert(lis->hasGapAfterInstr(miIdx)); return miIdx; } @@ -99,14 +99,8 @@ protected: true, ss, trc); MachineBasicBlock::iterator storeInstItr(next(mi)); MachineInstr *storeInst = &*storeInstItr; - SlotIndex storeInstIdx = miIdx.getNextIndex(); - - assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && - "Store inst index already in use."); - lis->InsertMachineInstrInMaps(storeInst, storeInstIdx); - - return storeInstIdx; + return lis->InsertMachineInstrInMaps(storeInst); } /// Insert a store of the given vreg to the given stack slot immediately @@ -120,14 +114,8 @@ protected: tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(prior(mi)); MachineInstr *storeInst = &*storeInstItr; - SlotIndex storeInstIdx = miIdx.getPrevIndex(); - - assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && - "Store inst index already in use."); - lis->InsertMachineInstrInMaps(storeInst, storeInstIdx); - - return storeInstIdx; + return lis->InsertMachineInstrInMaps(storeInst); } void insertStoreAfterInstOnInterval(LiveInterval *li, @@ -164,14 +152,8 @@ protected: tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(next(mi)); MachineInstr *loadInst = &*loadInstItr; - SlotIndex loadInstIdx = miIdx.getNextIndex(); - - assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && - "Store inst index already in use."); - lis->InsertMachineInstrInMaps(loadInst, loadInstIdx); - - return loadInstIdx; + return lis->InsertMachineInstrInMaps(loadInst); } /// Insert a load of the given vreg from the given stack slot immediately @@ -186,14 +168,8 @@ protected: tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(prior(mi)); MachineInstr *loadInst = &*loadInstItr; - SlotIndex loadInstIdx = miIdx.getPrevIndex(); - - assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && - "Load inst index already in use."); - - lis->InsertMachineInstrInMaps(loadInst, loadInstIdx); - return loadInstIdx; + return lis->InsertMachineInstrInMaps(loadInst); } void insertLoadBeforeInstOnInterval(LiveInterval *li, diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index c646869e8a73..102e2a34a97e 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -135,14 +135,52 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MachineOperand &MO = MI->getOperand(0); - MO.setReg(DestReg); - MO.setSubReg(SubIdx); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + MO.setReg(DestReg); + MO.setSubReg(SubIdx); + } else if (SubIdx) { + MO.setReg(TRI->getSubReg(DestReg, SubIdx)); + } else { + MO.setReg(DestReg); + } MBB.insert(I, MI); } +bool +TargetInstrInfoImpl::isIdentical(const MachineInstr *MI, + const MachineInstr *Other, + const MachineRegisterInfo *MRI) const { + if (MI->getOpcode() != Other->getOpcode() || + MI->getNumOperands() != Other->getNumOperands()) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + if (MO.isReg() && MO.isDef()) { + assert(OMO.isReg() && OMO.isDef()); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Reg != OMO.getReg()) + return false; + } else if (MRI->getRegClass(MO.getReg()) != + MRI->getRegClass(OMO.getReg())) + return false; + + continue; + } + + if (!MO.isIdenticalTo(OMO)) + return false; + } + + return true; +} + unsigned TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { unsigned FnSize = 0; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 0a6a0d745496..84467ed36d54 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1033,7 +1033,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); ReMatRegs.set(regB); ++NumReMats; } else { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index ce3eed17c723..c8c5d861578d 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -117,8 +117,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(), /*isSS*/true); + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); if (LowSpillSlot == NO_STACK_SLOT) LowSpillSlot = SS; if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) @@ -161,8 +161,8 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { EmergencySpillSlots.find(RC); if (I != EmergencySpillSlots.end()) return I->second; - int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(), - RC->getAlignment(), /*isSS*/true); + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); if (LowSpillSlot == NO_STACK_SLOT) LowSpillSlot = SS; if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index fd80f460992b..ec0abd137d6d 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -483,19 +483,20 @@ static void InvalidateKills(MachineInstr &MI, } /// InvalidateRegDef - If the def operand of the specified def MI is now dead -/// (since it's spill instruction is removed), mark it isDead. Also checks if +/// (since its spill instruction is removed), mark it isDead. Also checks if /// the def MI has other definition operands that are not dead. Returns it by /// reference. static bool InvalidateRegDef(MachineBasicBlock::iterator I, MachineInstr &NewDef, unsigned Reg, - bool &HasLiveDef) { + bool &HasLiveDef, + const TargetRegisterInfo *TRI) { // Due to remat, it's possible this reg isn't being reused. That is, // the def of this reg (by prev MI) is now dead. MachineInstr *DefMI = I; MachineOperand *DefOp = NULL; for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) + if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) continue; if (MO.getReg() == Reg) DefOp = &MO; @@ -512,7 +513,8 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I, MachineInstr *NMI = I; for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { MachineOperand &MO = NMI->getOperand(j); - if (!MO.isReg() || MO.getReg() != Reg) + if (!MO.isReg() || MO.getReg() == 0 || + (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) continue; if (MO.isUse()) FoundUse = true; @@ -556,11 +558,30 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, KillOps[*SR] = NULL; RegKills.reset(*SR); } - - if (!MI.isRegTiedToDefOperand(i)) - // Unless it's a two-address operand, this is the new kill. - MO.setIsKill(); + } else { + // Check for subreg kills as well. + // d4 = + // store d4, fi#0 + // ... + // = s8<kill> + // ... + // = d4 <avoiding reload> + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + unsigned SReg = *SR; + if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) { + KillOps[SReg]->setIsKill(false); + unsigned KReg = KillOps[SReg]->getReg(); + KillOps[KReg] = NULL; + RegKills.reset(KReg); + + for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) { + KillOps[*SSR] = NULL; + RegKills.reset(*SSR); + } + } + } } + if (MO.isKill()) { RegKills.set(Reg); KillOps[Reg] = &MO; @@ -573,7 +594,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); RegKills.reset(Reg); @@ -583,6 +604,10 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, RegKills.reset(*SR); KillOps[*SR] = NULL; } + for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { + RegKills.reset(*SR); + KillOps[*SR] = NULL; + } } } @@ -601,7 +626,7 @@ static void ReMaterialize(MachineBasicBlock &MBB, "Don't know how to remat instructions that define > 1 values!"); #endif TII->reMaterialize(MBB, MII, DestReg, - ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI); + ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -816,11 +841,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, "A reuse cannot be a virtual register"); if (PRRU != RealPhysRegUsed) { // What was the sub-register index? - unsigned SubReg; - for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++) - if (SubReg == RealPhysRegUsed) - break; - assert(SubReg == RealPhysRegUsed && + SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); + assert(SubIdx && "Operand physreg is not a sub-register of PhysRegUsed"); } @@ -1454,7 +1476,7 @@ private: // being reused. for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) { + if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { MachineInstr *DeadDef = PrevMII; if (ReMatDefs.count(DeadDef) && !HasOtherDef) { // FIXME: This assumes a remat def does not have side effects. @@ -1704,6 +1726,7 @@ private: // Mark is killed. MachineInstr *CopyMI = prior(InsertLoc); + CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse); MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); @@ -1984,6 +2007,7 @@ private: TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC); MachineInstr *CopyMI = prior(InsertLoc); + CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse); UpdateKills(*CopyMI, TRI, RegKills, KillOps); // This invalidates DesignatedReg. @@ -2112,6 +2136,7 @@ private: // virtual or needing to clobber any values if it's physical). NextMII = &MI; --NextMII; // backtrack to the copy. + NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse); // Propagate the sub-register index over. if (SubIdx) { DefMO = NextMII->findRegisterDefOperand(DestReg); |