diff options
Diffstat (limited to 'lib/CodeGen')
96 files changed, 7934 insertions, 4813 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index b520d8fcedc0..5c809f7fd668 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -357,7 +357,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, RegRefs = State->GetRegRefs(); // Handle dead defs by simulating a last-use of the register just - // after the def. A dead def can occur because the def is truely + // after the def. A dead def can occur because the def is truly // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 36638c36de67..125e64196f15 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -211,7 +211,6 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); - const Function *F = ExitBB->getParent(); // The block must end in a return statement or unreachable. // @@ -250,6 +249,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. + const Function *F = ExitBB->getParent(); unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp new file mode 100644 index 000000000000..0db28a636ad8 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -0,0 +1,87 @@ +//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +ARMException::ARMException(AsmPrinter *A) + : DwarfException(A), + shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + {} + +ARMException::~ARMException() {} + +void ARMException::EndModule() { +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void ARMException::BeginFunction(const MachineFunction *MF) { + Asm->OutStreamer.EmitFnStart(); + if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void ARMException::EndFunction() { + if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory) + Asm->OutStreamer.EmitCantUnwind(); + else { + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer.EmitPersonality(PerSym); + } + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + Asm->OutStreamer.EmitHandlerData(); + + // Emit actual exception table + EmitExceptionTable(); + } + + Asm->OutStreamer.EmitFnEnd(); +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9cb882e6a1bb..8116f8d5925f 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -33,10 +33,12 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/SmallString.h" @@ -70,17 +72,17 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) NumBits = TD.getPreferredAlignmentLog(GVar); - + // If InBits is specified, round it to it. if (InBits > NumBits) NumBits = InBits; - + // If the GV has a specified alignment, take it into account. if (GV->getAlignment() == 0) return NumBits; - + unsigned GVAlign = Log2_32(GV->getAlignment()); - + // If the GVAlign is larger than NumBits, or if we are required to obey // NumBits because the GV has an assigned section, obey it. if (GVAlign > NumBits || GV->hasSection()) @@ -104,16 +106,16 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) AsmPrinter::~AsmPrinter() { assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); - + if (GCMetadataPrinters != 0) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - + for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) delete I->second; delete &GCMap; GCMetadataPrinters = 0; } - + delete &OutStreamer; } @@ -156,9 +158,9 @@ bool AsmPrinter::doInitialization(Module &M) { // Initialize TargetLoweringObjectFile. const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - + Mang = new Mangler(OutContext, *TM.getTargetData()); - + // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -196,6 +198,9 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::DwarfCFI: DE = new DwarfCFIException(this); break; + case ExceptionHandling::ARM: + DE = new ARMException(this); + break; } return false; @@ -250,56 +255,52 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { - if (!GV->hasInitializer()) // External globals require no code. - return; - - // Check to see if this is a special global used by LLVM, if so, emit it. - if (EmitSpecialLLVMGlobal(GV)) - return; + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; + if (isVerbose()) { + WriteAsOperand(OutStreamer.GetCommentOS(), GV, + /*PrintType=*/false, GV->getParent()); + OutStreamer.GetCommentOS() << '\n'; + } } - + MCSymbol *GVSym = Mang->getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility()); + if (!GV->hasInitializer()) // External globals require no extra code. + return; + if (MAI->hasDotTypeDotSizeDirective()) OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); - + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); const TargetData *TD = TM.getTargetData(); uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - + // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *TD); - + // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; - } - + // Handle common symbols. if (GVKind.isCommon()) { unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; - + // .comm _foo, 42, 4 OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } - + // Handle local BSS symbols. if (MAI->hasMachoZeroFillDirective()) { const MCSection *TheSection = @@ -308,7 +309,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } - + if (MAI->hasLCOMMDirective()) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size); @@ -318,14 +319,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; - + // .local _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } - + const MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); @@ -333,14 +334,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // emission. if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - + // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .zerofill __DATA, __common, _foo, 400, 5 OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } - + // Handle thread local data for mach-o which requires us to output an // additional structure of data and mangle the original symbol so that we // can reference it later. @@ -353,31 +354,31 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // specific code. if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol - MCSymbol *MangSym = + MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - + if (GVKind.isThreadBSS()) OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); - EmitAlignment(AlignLog, GV); + EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(MangSym); - + EmitGlobalConstant(GV->getInitializer()); } - + OutStreamer.AddBlankLine(); - + // Emit the variable struct for the runtime. - const MCSection *TLVSect + const MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); - + OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. EmitLinkage(GV->getLinkage(), GVSym); OutStreamer.EmitLabel(GVSym); - + // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime @@ -387,7 +388,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); - + OutStreamer.AddBlankLine(); return; } @@ -404,7 +405,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); - + OutStreamer.AddBlankLine(); } @@ -413,7 +414,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { void AsmPrinter::EmitFunctionHeader() { // Print out constants referenced by the function EmitConstantPool(); - + // Print the 'header' of function. const Function *F = MF->getFunction(); @@ -435,7 +436,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. EmitFunctionEntryLabel(); - + // If the function had address-taken blocks that got deleted, then we have // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. @@ -445,17 +446,17 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.AddComment("Address taken block that was later removed"); OutStreamer.EmitLabel(DeadBlockSyms[i]); } - + // Add some workaround for linkonce linkage on Cygwin\MinGW. if (MAI->getLinkOnceDirective() != 0 && (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { // FIXME: What is this? - MCSymbol *FakeStub = + MCSymbol *FakeStub = OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ CurrentFnSym->getName()); OutStreamer.EmitLabel(FakeStub); } - + // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -480,44 +481,16 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << "<unknown>"; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << "[ "; - EmitDebugLoc(InlinedAtDL, MF, CommentOS); - CommentOS << " ]"; - } - } -} - /// EmitComments - Pretty-print comments for instructions. static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); - - DebugLoc DL = MI.getDebugLoc(); - if (!DL.isUnknown()) { // Print source line info. - EmitDebugLoc(DL, MF, CommentOS); - CommentOS << '\n'; - } - + // Check for spills and reloads int FI; - + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); - + // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; @@ -538,7 +511,7 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } - + // Check for spill-induced copies if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) CommentOS << " Reload Reuse\n"; @@ -612,21 +585,61 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); } - + OS << '+' << MI->getOperand(1).getImm(); // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } +bool AsmPrinter::needsCFIMoves() { + if (UnwindTablesMandatory) + return true; + + if (MMI->hasDebugInfo() && !MAI->doesDwarfRequireFrameSection()) + return true; + + if (MF->getFunction()->doesNotThrow()) + return false; + + return true; +} + +void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + + if (MAI->doesDwarfRequireFrameSection() || + MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + OutStreamer.EmitLabel(Label); + + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + return; + + if (!needsCFIMoves()) + return; + + MachineModuleInfo &MMI = MF->getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + bool FoundOne = false; + (void)FoundOne; + for (std::vector<MachineMove>::iterator I = Moves.begin(), + E = Moves.end(); I != E; ++I) { + if (I->getLabel() == Label) { + EmitCFIFrameMove(*I); + FoundOne = true; + } + } + assert(FoundOne); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - + bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); - + // Print out code for the function. bool HasAnyRealCode = false; const MachineInstr *LastMI = 0; @@ -649,12 +662,15 @@ void AsmPrinter::EmitFunctionBody() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginInstruction(II); } - + if (isVerbose()) EmitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: + emitPrologLabel(*II); + break; + case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); @@ -675,10 +691,13 @@ void AsmPrinter::EmitFunctionBody() { if (isVerbose()) EmitKill(II, *this); break; default: + if (!TM.hasMCUseLoc()) + MCLineEntry::Make(&OutStreamer, getCurrentSection()); + EmitInstruction(II); break; } - + if (ShouldPrintDebugScopes) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->endInstruction(II); @@ -705,10 +724,10 @@ void AsmPrinter::EmitFunctionBody() { } else // Target not mc-ized yet. OutStreamer.EmitRawText(StringRef("\tnop\n")); } - + // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - + // If the target wants a .size directive for the size of the function, emit // it. if (MAI->hasDotTypeDotSizeDirective()) { @@ -716,14 +735,14 @@ void AsmPrinter::EmitFunctionBody() { // difference between the function label and the temp label. MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(FnEndLabel); - + const MCExpr *SizeExp = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), MCSymbolRefExpr::Create(CurrentFnSym, OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - + // Emit post-function debug information. if (DD) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -734,20 +753,71 @@ void AsmPrinter::EmitFunctionBody() { DE->EndFunction(); } MMI->EndFunction(); - + // Print out jump tables referenced by the function. EmitJumpTableInfo(); - + OutStreamer.AddBlankLine(); } /// getDebugValueLocation - Get location information encoded by DBG_VALUE /// operands. -MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { +MachineLocation AsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { // Target specific DBG_VALUE instructions are handled by each target. return MachineLocation(); } +/// getDwarfRegOpSize - get size required to emit given machine location using +/// dwarf encoding. +unsigned AsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(MLoc.getReg(), false); + if (int Offset = MLoc.getOffset()) { + // If the value is at a certain offset from frame register then + // use DW_OP_breg. + if (DWReg < 32) + return 1 + MCAsmInfo::getSLEB128Size(Offset); + else + return 1 + MCAsmInfo::getULEB128Size(MLoc.getReg()) + + MCAsmInfo::getSLEB128Size(Offset); + } + if (DWReg < 32) + return 1; + + return 1 + MCAsmInfo::getULEB128Size(DWReg); +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + unsigned Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + if (int Offset = MLoc.getOffset()) { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + EmitInt8(dwarf::DW_OP_breg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_bregx"); + EmitInt8(dwarf::DW_OP_bregx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + EmitSLEB128(Offset); + } else { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_regx"); + EmitInt8(dwarf::DW_OP_regx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + } +} + bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); @@ -782,7 +852,7 @@ bool AsmPrinter::doFinalization(Module &M) { } delete DD; DD = 0; } - + // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { // FIXME: This is not lazy, it would be nice to only print weak references @@ -796,7 +866,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (!I->hasExternalWeakLinkage()) continue; OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); } - + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); @@ -822,7 +892,7 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, I->getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, + OutStreamer.EmitAssignment(Name, MCSymbolRefExpr::Create(Target, OutContext)); } } @@ -839,14 +909,14 @@ bool AsmPrinter::doFinalization(Module &M) { if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer.SwitchSection(S); - + // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. EmitEndOfAsmFile(M); - + delete Mang; Mang = 0; MMI = 0; - + OutStreamer.Finish(); return false; } @@ -886,7 +956,7 @@ void AsmPrinter::EmitConstantPool() { for (unsigned i = 0, e = CP.size(); i != e; ++i) { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - + SectionKind Kind; switch (CPE.getRelocationInfo()) { default: llvm_unreachable("Unknown section kind"); @@ -904,7 +974,7 @@ void AsmPrinter::EmitConstantPool() { } const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); - + // The number of sections are small, just do a linear search from the // last section to the first. bool Found = false; @@ -953,7 +1023,7 @@ void AsmPrinter::EmitConstantPool() { } /// EmitJumpTableInfo - Print assembly representations of the jump tables used -/// by the current function to the current output stream. +/// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -962,7 +1032,7 @@ void AsmPrinter::EmitJumpTableInfo() { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); if (JT.empty()) return; - // Pick the directive to use to print the jump table entries, and switch to + // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. const Function *F = MF->getFunction(); bool JTInDiffSection = false; @@ -978,18 +1048,18 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); } else { // Otherwise, drop it in the readonly section. - const MCSection *ReadOnlySection = + const MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); - + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - - // If this jump table was deleted, ignore it. + + // If this jump table was deleted, ignore it. if (JTBBs.empty()) continue; // For the EK_LabelDifference32 entry, if the target supports .set, emit a @@ -1003,15 +1073,15 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; if (!EmittedSets.insert(MBB)) continue; - + // .set LJTSet, LBB32-base const MCExpr *LHS = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), MCBinaryExpr::CreateSub(LHS, Base, OutContext)); } - } - + } + // On some targets (e.g. Darwin) we want to emit two consecutive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The @@ -1064,8 +1134,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // If the .set directive is supported, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - - // If we have emitted set directives for the jump table entries, print + + // If we have emitted set directives for the jump table entries, print // them rather than the entries themselves. If we're emitting PIC, then // emit the table entries as differences between two text section labels. if (MAI->hasSetDirective()) { @@ -1081,9 +1151,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, break; } } - + assert(Value && "Unknown entry kind!"); - + unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } @@ -1103,18 +1173,18 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getSection() == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; - + if (!GV->hasAppendingLinkage()) return false; assert(GV->hasInitializer() && "Not a special LLVM global!"); - + const TargetData *TD = TM.getTargetData(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); - + if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { StringRef Sym(".constructors_used"); @@ -1122,8 +1192,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { MCSA_Reference); } return true; - } - + } + if (GV->getName() == "llvm.global_dtors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); EmitAlignment(Align); @@ -1137,7 +1207,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } return true; } - + return false; } @@ -1148,7 +1218,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { // Should be an array of 'i8*'. ConstantArray *InitList = dyn_cast<ConstantArray>(List); if (InitList == 0) return; - + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); @@ -1157,7 +1227,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { } } -/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the /// function pointers, ignoring the init priority. void AsmPrinter::EmitXXStructorList(Constant *List) { // Should be an array of '{ int, void ()* }' structs. The first value is the @@ -1203,11 +1273,11 @@ void AsmPrinter::EmitInt32(int Value) const { void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { // Get the Hi-Lo expression. - const MCExpr *Diff = + const MCExpr *Diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - + if (!MAI->hasSetDirective()) { OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); return; @@ -1219,27 +1289,27 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); } -/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, unsigned Size) + const MCSymbol *Lo, unsigned Size) const { - + // Emit Hi+Offset - Lo // Get the Hi+Offset expression. const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), MCConstantExpr::Create(Offset, OutContext), OutContext); - + // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = + const MCExpr *Diff = MCBinaryExpr::CreateSub(Plus, MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - - if (!MAI->hasSetDirective()) + + if (!MAI->hasSetDirective()) OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); else { // Otherwise, emit with .set (aka assignment). @@ -1249,22 +1319,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, } } -/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size) const { - + // Emit Label+Offset const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), MCConstantExpr::Create(Offset, OutContext), OutContext); - + OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); } - + //===----------------------------------------------------------------------===// @@ -1276,9 +1346,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); - + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. - + if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); else @@ -1293,25 +1363,25 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { /// static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; - + if (CV->isNullValue() || isa<UndefValue>(CV)) return MCConstantExpr::Create(0, Ctx); if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) return MCConstantExpr::Create(CI->getZExtValue(), Ctx); - + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); - + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (CE == 0) { llvm_unreachable("Unknown constant value to lower!"); return MCConstantExpr::Create(0, Ctx); } - + switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding @@ -1339,21 +1409,21 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0], IdxVec.size()); - + const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); if (Offset == 0) return Base; - + // Truncate/sext the offset to the pointer size. if (TD.getPointerSizeInBits() != 64) { int SExtAmount = 64-TD.getPointerSizeInBits(); Offset = (Offset << SExtAmount) >> SExtAmount; } - + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); } - + case Instruction::Trunc: // We emit the value and depend on the assembler to truncate the generated // expression properly. This is important for differences between @@ -1372,7 +1442,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { false/*ZExt*/); return LowerConstant(Op, AP); } - + case Instruction::PtrToInt: { const TargetData &TD = *AP.TM.getTargetData(); // Support only foldable casts to/from pointers that can be eliminated by @@ -1394,7 +1464,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: @@ -1435,7 +1505,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); return; } - + // Otherwise, it can be emitted as .ascii. SmallVector<char, 128> TmpVec; TmpVec.reserve(CA->getNumOperands()); @@ -1493,7 +1563,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); return; } - + if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); @@ -1503,7 +1573,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); return; } - + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // API needed to prevent premature destruction @@ -1518,7 +1588,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= " << DoubleVal.convertToDouble() << '\n'; } - + if (AP.TM.getTargetData()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); @@ -1526,14 +1596,14 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); } - + // Emit the tail padding for the long double. const TargetData &TD = *AP.TM.getTargetData(); AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - TD.getTypeStoreSize(CFP->getType()), AddrSpace); return; } - + assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); // All long double variants are printed as hex @@ -1588,10 +1658,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return; } } - + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) return EmitGlobalConstantArray(CVA, AddrSpace, AP); - + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) return EmitGlobalConstantStruct(CVS, AddrSpace, AP); @@ -1603,10 +1673,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } - + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); - + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. AP.OutStreamer.EmitValue(LowerConstant(CV, AP), @@ -1703,7 +1773,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; Mang->getNameWithPrefix(NameStr, Sym); return OutContext.GetOrCreateSymbol(NameStr.str()); -} +} @@ -1740,10 +1810,10 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, // Add loop depth information const MachineLoop *Loop = LI->getLoopFor(&MBB); if (Loop == 0) return; - + MachineBasicBlock *Header = Loop->getHeader(); assert(Header && "No header for loop"); - + // If this block is not a loop header, just print out what is the loop header // and return. if (Header != &MBB) { @@ -1753,21 +1823,21 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, " Depth="+Twine(Loop->getLoopDepth())); return; } - + // Otherwise, it is a loop header. Print out information about child and // parent loops. raw_ostream &OS = AP.OutStreamer.GetCommentOS(); - - PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); - + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + OS << "=>"; OS.indent(Loop->getLoopDepth()*2-2); - + OS << "This "; if (Loop->empty()) OS << "Inner "; OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; - + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } @@ -1788,7 +1858,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { const BasicBlock *BB = MBB->getBasicBlock(); if (isVerbose()) OutStreamer.AddComment("Block address taken"); - + std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB); for (unsigned i = 0, e = Syms.size(); i != e; ++i) @@ -1801,9 +1871,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { if (const BasicBlock *BB = MBB->getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - + EmitBasicBlockLoopComments(*MBB, LI, *this); - + // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); @@ -1823,7 +1893,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; - + switch (Visibility) { default: break; case GlobalValue::HiddenVisibility: @@ -1850,23 +1920,23 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) return false; - + // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; if (PI2 != MBB->pred_end()) return false; - + // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *PI; - + if (!Pred->isLayoutSuccessor(MBB)) return false; - + // If the block is completely empty, then it definitely does fall through. if (Pred->empty()) return true; - + // Otherwise, check the last instruction. const MachineInstr &LastInst = Pred->back(); return !LastInst.getDesc().isBarrier(); @@ -1882,9 +1952,9 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { gcp_map_type::iterator GCPI = GCMap.find(S); if (GCPI != GCMap.end()) return GCPI->second; - + const char *Name = S->getName().c_str(); - + for (GCMetadataPrinterRegistry::iterator I = GCMetadataPrinterRegistry::begin(), E = GCMetadataPrinterRegistry::end(); I != E; ++I) @@ -1894,7 +1964,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { GCMap.insert(std::make_pair(S, GMP)); return GMP; } - + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); return 0; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 98a1bf2f1ce4..6d1708a2ce80 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -155,7 +155,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer); + TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer); OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding)); } @@ -277,42 +277,42 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves, } } -/// EmitFrameMoves - Emit frame instructions to describe the layout of the -/// frame. -void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const { +/// EmitFrameMoves - Emit a frame instruction. +void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - int stackGrowth = TM.getTargetData()->getPointerSize(); - if (TM.getFrameLowering()->getStackGrowthDirection() != - TargetFrameLowering::StackGrowsUp) - stackGrowth *= -1; + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + assert(!Src.isReg() && "Machine move not supported yet."); + if (Src.getReg() == MachineLocation::VirtualFP) { + OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); + } else { + assert("Machine move not supported yet"); + // Reg + Offset + } + } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(!Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), + Dst.getOffset()); + } +} + +/// EmitFrameMoves - Emit frame instructions to describe the layout of the +/// frame. +void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const { for (unsigned i = 0, N = Moves.size(); i < N; ++i) { const MachineMove &Move = Moves[i]; MCSymbol *Label = Move.getLabel(); // Throw out move if the label is invalid. if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - assert(!Src.isReg() && "Machine move not supported yet."); - - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - assert("Machine move not supported yet"); - // Reg + Offset - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); - } + EmitCFIFrameMove(Move); } } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 306efade7d92..d2be5525d7d0 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -1,9 +1,11 @@ add_llvm_library(LLVMAsmPrinter + ARMException.cpp AsmPrinter.cpp AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp DwarfCFIException.cpp + DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp DwarfTableException.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index d56c0947795e..7d61f1edff4a 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -70,7 +70,6 @@ namespace llvm { public: DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} - virtual ~DIEAbbrev() {} // Accessors. unsigned getTag() const { return Tag; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 68be2eed8f0e..dbd52c4981b3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -40,7 +41,7 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) {} DwarfCFIException::~DwarfCFIException() {} @@ -51,81 +52,85 @@ void DwarfCFIException::EndModule() { if (!Asm->MAI->isExceptionHandlingDwarf()) return; - if (!shouldEmitTableModule) - return; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); - // Begin eh frame section. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + return; // Emit references to all used personality functions + bool AtLeastOne = false; const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i)); - Asm->EmitReference(Personalities[i], PerEncoding); + if (!Personalities[i]) + continue; + MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); + AtLeastOne = true; + } + + if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { + // This is a temporary hack to keep sections in the same order they + // were before. This lets us produce bit identical outputs while + // transitioning to CFI. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); } } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfCFIException::BeginFunction(const MachineFunction *MF) { - shouldEmitTable = shouldEmitMoves = false; + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. - shouldEmitTable = !MMI->getLandingPads().empty(); + bool hasLandingPads = !MMI->getLandingPads().empty(); // See if we need frame move info. - shouldEmitMoves = - !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; + shouldEmitMoves = Asm->needsCFIMoves(); - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - shouldEmitTableModule |= shouldEmitTable; + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; - if (shouldEmitMoves) { - const TargetFrameLowering *TFL = Asm->TM.getFrameLowering(); - Asm->OutStreamer.EmitCFIStartProc(); + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; - // Indicate locations of general callee saved registers in frame. - std::vector<MachineMove> Moves; - TFL->getInitialFrameState(Moves); - Asm->EmitCFIFrameMoves(Moves); - Asm->EmitCFIFrameMoves(MMI->getFrameMoves()); - } + if (!shouldEmitPersonality && !shouldEmitMoves) + return; - if (!shouldEmitTable) + Asm->OutStreamer.EmitCFIStartProc(); + + // Indicate personality routine, if any. + if (!shouldEmitPersonality) return; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); // Provide LSDA information. - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - if (LSDAEncoding != dwarf::DW_EH_PE_omit) - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + if (!shouldEmitLSDA) + return; - // Indicate personality routine, if any. - unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if (PerEncoding != dwarf::DW_EH_PE_omit && - MMI->getPersonalities()[MMI->getPersonalityIndex()]) - Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality", - MMI->getPersonalityIndex()), - PerEncoding); + Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber()), + LSDAEncoding); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfCFIException::EndFunction() { - if (!shouldEmitMoves && !shouldEmitTable) return; + if (!shouldEmitPersonality && !shouldEmitMoves) + return; - if (shouldEmitMoves) - Asm->OutStreamer.EmitCFIEndProc(); + Asm->OutStreamer.EmitCFIEndProc(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -133,6 +138,6 @@ void DwarfCFIException::EndFunction() { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - if (shouldEmitTable) + if (shouldEmitPersonality) EmitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 000000000000..7ce0cfe8e79e --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,983 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +/// CompileUnit - Compile unit constructor. +CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW) + : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +/// ~CompileUnit - Destructor for compile unit. +CompileUnit::~CompileUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void CompileUnit::addUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void CompileUnit::addSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. +void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, + StringRef String) { + DIEValue *Value = new (DIEValueAllocator) DIEString(String); + Die->addValue(Attribute, Form, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +/// addDelta - Add a label delta attribute data and value. +/// +void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, Form, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + + +/// addBlock - Add block data. +/// +void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.Verify()) + return; + + unsigned Line = V.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.Verify()) + return; + + unsigned Line = G.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(), + G.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.Verify()) + return; + // If the line number is 0, don't add it. + if (SP.getLineNumber() == 0) + return; + + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0 || !Ty.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; + StringRef FN = NS.getFilename(); + + unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, + MachineLocation Location) { + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// addRegisterOp - Add register operand. +void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } +} + +/// addRegisterOffset - Add register offset. +void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, + int64_t Offset) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (Reg == TRI->getFrameRegister(*Asm->MF)) + // If variable offset is based in frame register then use fbreg. + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + else if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } + addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void CompileUnit::addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned N = DV->getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + i = 2; + } else + addRegisterOp(Block, Location.getReg()); + } + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + for (;i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV->getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = Element; + else if (fieldName == varName) + varField = Element; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = + DIDerivedType(forwardingField).getOffsetInBits() >> 3; + unsigned varFieldOffset = + DIDerivedType(varField).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned Imm = MO.getImm(); + addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, + bool Unsigned) { + if (CI->getBitWidth() <= 64) { + if (Unsigned) + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + return true; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const APInt Val = CI->getValue(); + const char *Ptr = (const char*)Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & Ptr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addTemplateParams - Add template parameters in buffer. +void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { + // Add template parameters. + for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + Buffer.addChild(getOrCreateTemplateTypeParameterDIE( + DITemplateTypeParameter(Element))); + else if (Element.isTemplateValueParameter()) + Buffer.addChild(getOrCreateTemplateValueParameterDIE( + DITemplateValueParameter(Element))); + } + +} +/// addToContextOwner - Add Die into the list of its context owner's children. +void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); + ContextDIE->addChild(Die); + } else if (Context.isNameSpace()) { + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); + ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context)); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = getDIE(Context)) + ContextDIE->addChild(Die); + else + addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) { + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + insertDIE(Ty, TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty)); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + +/// addType - Add a new type attribute to the specified entity. +void CompileUnit::addType(DIE *Entity, DIType Ty) { + if (!Ty.Verify()) + return; + + // Check for pre-existence. + DIEEntry *Entry = getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + insertDIEEntry(Ty, Entry); + + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Get core information. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIDescriptor Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); + Buffer.addChild(ElemDie); + } + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + addType(&Buffer, DIType(RTy)); + + bool isPrototyped = true; + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Elements.getElement(i); + if (Ty.isUnspecifiedParameter()) { + DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); + Buffer.addChild(Arg); + isPrototyped = false; + } else { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Ty)); + Buffer.addChild(Arg); + } + } + // Add prototype flag. + if (isPrototyped) + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + unsigned N = Elements.getNumElements(); + if (N == 0) + break; + + // Add elements to structure type. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) { + DISubprogram SP(Element); + ElemDie = DD->createSubprogramDIE(DISubprogram(Element)); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + } + else if (Element.isVariable()) { + DIVariable DV(Element); + ElemDie = new DIE(dwarf::DW_TAG_variable); + addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + DV.getName()); + addType(ElemDie, DV.getType()); + addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(ElemDie, DV); + } else if (Element.isDerivedType()) + ElemDie = createMemberDIE(DIDerivedType(Element)); + else + continue; + Buffer.addChild(ElemDie); + } + + if (CTy.isAppleBlockExtension()) + addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + + DICompositeType ContainingType = CTy.getContainingType(); + if (DIDescriptor(ContainingType).isCompositeType()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } + + if (Tag == dwarf::DW_TAG_class_type) + addTemplateParams(Buffer, CTy.getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + } +} + +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateTypeParameter. +DIE * +CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { + DIE *ParamDIE = getDIE(TP); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); + addType(ParamDIE, TP.getType()); + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + return ParamDIE; +} + +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateValueParameter. +DIE * +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { + DIE *ParamDIE = getDIE(TPV); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + addType(ParamDIE, TPV.getType()); + if (!TPV.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + TPV.getValue()); + return ParamDIE; +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + + // The L value defines the lower bounds which is typically zero for C/C++. The + // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size + // of the array. If L > H then do not emit DW_AT_lower_bound and + // DW_AT_upper_bound attributes. If L is zero and H is also zero then the + // array has one element and in such case do not emit lower bound. + + if (L > H) { + Buffer.addChild(DW_Subrange); + return; + } + if (L) + addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + Buffer.addChild(DW_Subrange); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + addType(&Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Get an anonymous type for index type. + DIE *IdxTy = getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + addDie(IdxTy); + setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + StringRef Name = ETy.getName(); + addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// createMemberDIE - Create new member DIE. +DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { + DIE *MemberDie = new DIE(DT.getTag()); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + addType(MemberDie, DT.getTypeDerivedFrom()); + + addSourceLine(MemberDie, DT); + + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getTargetData().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + + } else + // This is not a bitfield. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + StringRef PropertyName = DT.getObjCPropertyName(); + if (!PropertyName.empty()) { + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, + PropertyName); + StringRef GetterName = DT.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, + dwarf::DW_FORM_string, GetterName); + StringRef SetterName = DT.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, + dwarf::DW_FORM_string, SetterName); + unsigned PropertyAttributes = 0; + if (DT.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (DT.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (DT.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (DT.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (DT.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (DT.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + } + return MemberDie; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 000000000000..f4f6fb8b0df4 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,282 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/OwningPtr.h" + +namespace llvm { + +class DwarfDebug; +class MachineLocation; +class MachineOperand; +class ConstantInt; +class DbgVariable; + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + const OwningPtr<DIE> CUDie; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + DwarfDebug *DD; + + /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap<DIE*> Globals; + + /// GlobalTypes - A map of globally visible types for this unit. + /// + StringMap<DIE*> GlobalTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + +public: + CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); + ~CompileUnit(); + + // Accessors. + unsigned getID() const { return ID; } + DIE* getCUDie() const { return CUDie.get(); } + const StringMap<DIE*> &getGlobals() const { return Globals; } + const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !CUDie->getChildren().empty(); } + + /// addGlobal - Add a new global entity to the compile unit. + /// + void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } + + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(StringRef Name, DIE *Die) { + GlobalTypes[Name] = Die; + } + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + + DIEBlock *getDIEBlock() { + return new (DIEValueAllocator) DIEBlock(); + } + + /// insertDIE - Insert DIE into the map. + void insertDIE(const MDNode *N, DIE *D) { + MDNodeToDieMap.insert(std::make_pair(N, D)); + } + + /// getDIEEntry - Returns the debug information entry for the speciefied + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap<const MDNode *, DIEEntry *>::iterator I = + MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) + return NULL; + return I->second; + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } +public: + + /// addUInt - Add an unsigned integer attribute data and value. + /// + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + /// + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + /// + void addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef Str); + + /// addLabel - Add a Dwarf label attribute data and value. + /// + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + /// addDelta - Add a label delta attribute data and value. + /// + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + + /// addBlock - Add block data. + /// + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, const MachineOperand &MO); + bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIE *TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + /// + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); + + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + + /// addType - Add a new type attribute to the specified entity. + void addType(DIE *Entity, DIType Ty); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); + + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateTypeParameter. + DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateValueParameter. + DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, + DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, + DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, + DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(DIEnumerator ETy); + + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(DIDerivedType DT); + +private: + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + DIEInteger *DIEIntegerOne; +}; + +} // end llvm namespace +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 780fa405ef51..26da8006b30e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" @@ -52,7 +53,7 @@ static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::desc("Disable debug info printing")); static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absense of debug location information explicit."), + cl::desc("Make an absence of debug location information explicit."), cl::init(false)); #ifndef NDEBUG @@ -72,189 +73,56 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { -//===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate -/// with a source file. -class CompileUnit { - /// ID - File identifier for source. - /// - unsigned ID; - - /// Die - Compile unit debug information entry. - /// - const OwningPtr<DIE> CUDie; - - /// IndexTyDie - An anonymous type for index type. Owned by CUDie. - DIE *IndexTyDie; - - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton - /// variables to debug information entries. - DenseMap<const MDNode *, DIE *> MDNodeToDieMap; - - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton - /// descriptors to debug information entries using a DIEEntry proxy. - DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap<DIE*> Globals; - - /// GlobalTypes - A map of globally visible types for this unit. - /// - StringMap<DIE*> GlobalTypes; - -public: - CompileUnit(unsigned I, DIE *D) - : ID(I), CUDie(D), IndexTyDie(0) {} - - // Accessors. - unsigned getID() const { return ID; } - DIE* getCUDie() const { return CUDie.get(); } - const StringMap<DIE*> &getGlobals() const { return Globals; } - const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } - - /// hasContent - Return true if this compile unit has something to write out. - /// - bool hasContent() const { return !CUDie->getChildren().empty(); } - - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - - /// addGlobalType - Add a new global type to the compile unit. - /// - void addGlobalType(StringRef Name, DIE *Die) { - GlobalTypes[Name] = Die; - } - - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } - - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } - - /// getDIEEntry - Returns the debug information entry for the speciefied - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap<const MDNode *, DIEEntry *>::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } - - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } - - /// addDie - Adds or interns the DIE to the compile unit. - /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } - -}; - -//===----------------------------------------------------------------------===// -/// DbgVariable - This class is used to track local variable information. -/// -class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. -public: - // AbsVar may be NULL. - DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} - - // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } - unsigned getTag() const { return Var.getTag(); } - bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.hasComplexAddress(); - } - bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(); - } - unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.getNumAddrElements(); - } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } - DIType getType() const { - DIType Ty = Var.getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Var.isBlockByrefVariable()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); - - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); - } - return Ty; +DIType DbgVariable::getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); } return Ty; } -}; + return Ty; +} //===----------------------------------------------------------------------===// /// DbgRange - This is used to track range of instructions with identical @@ -396,15 +264,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); } } DwarfDebug::~DwarfDebug() { - for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) - DIEBlocks[j]->~DIEBlock(); } MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { @@ -439,852 +304,6 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { } } -/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug -/// information entry. -DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { - DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); - return Value; -} - -/// addUInt - Add an unsigned integer attribute data and value. -/// -void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addSInt - Add an signed integer attribute data and value. -/// -void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. -void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); -} - -/// addLabel - Add a Dwarf label attribute data and value. -/// -void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, Form, Value); -} - -/// addDelta - Add a label delta attribute data and value. -/// -void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); -} - -/// addDIEEntry - Add a DIE attribute data and value. -/// -void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, - DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); -} - - -/// addBlock - Add block data. -/// -void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, - DIEBlock *Block) { - Block->ComputeSize(Asm); - DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Block->BestForm(), Block); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { - // Verify variable. - if (!V.Verify()) - return; - - unsigned Line = V.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { - // Verify global variable. - if (!G.Verify()) - return; - - unsigned Line = G.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { - // Verify subprogram. - if (!SP.Verify()) - return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; - - unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(SP.getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { - // Verify type. - if (!Ty.Verify()) - return; - - unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(Ty.getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { - // Verify namespace. - if (!NS.Verify()) - return; - - unsigned Line = NS.getLineNumber(); - if (Line == 0) - return; - StringRef FN = NS.getFilename(); - - unsigned FileID = GetOrCreateSourceID(FN); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based -/// on provided frame index. -void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { - MachineLocation Location; - unsigned FrameReg; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - Location.set(FrameReg, Offset); - - if (DV->variableHasComplexAddress()) - addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); - else - addAddress(Die, dwarf::DW_AT_location, Location); -} - -/// addComplexAddress - Start with the address based on the location provided, -/// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DIVariable, starting from -/// the starting location. Add the DWARF information to the die. -/// -void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); - - if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/* Byref variables, in Blocks, are declared by the programmer as "SomeType - VarName;", but the compiler creates a __Block_byref_x_VarName struct, and - gives the variable VarName either the struct, or a pointer to the struct, as - its type. This is necessary for various behind-the-scenes things the - compiler needs to do with by-reference variables in Blocks. - - However, as far as the original *programmer* is concerned, the variable - should still have type 'SomeType', as originally declared. - - The function getBlockByrefType dives into the __Block_byref_x_VarName - struct to find the original type of the variable, which is then assigned to - the variable's Debug Information Entry as its real type. So far, so good. - However now the debugger will expect the variable VarName to have the type - SomeType. So we need the location attribute for the variable to be an - expression that explains to the debugger how to navigate through the - pointers and struct to find the actual variable of type SomeType. - - The following function does just that. We start by getting - the "normal" location for the variable. This will be the location - of either the struct __Block_byref_x_VarName or the pointer to the - struct __Block_byref_x_VarName. - - The struct will look something like: - - struct __Block_byref_x_VarName { - ... <various fields> - struct __Block_byref_x_VarName *forwarding; - ... <various other fields> - SomeType VarName; - ... <maybe more fields> - }; - - If we are given the struct directly (as our starting point) we - need to tell the debugger to: - - 1). Add the offset of the forwarding field. - - 2). Follow that pointer to get the real __Block_byref_x_VarName - struct to use (the real one may have been copied onto the heap). - - 3). Add the offset for the field VarName, to find the actual variable. - - If we started with a pointer to the struct, then we need to - dereference that pointer first, before the other steps. - Translating this into DWARF ops, we will need to append the following - to the current location description for the variable: - - DW_OP_deref -- optional, if we start with a pointer - DW_OP_plus_uconst <forward_fld_offset> - DW_OP_deref - DW_OP_plus_uconst <varName_fld_offset> - - That is what this function does. */ - -/// addBlockByrefAddress - Start with the address based on the location -/// provided, and generate the DWARF information necessary to find the -/// actual Block variable (navigating the Block struct) based on the -/// starting location. Add the DWARF information to the die. For -/// more information, read large comment just above here. -/// -void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); - bool isPointer = false; - - StringRef varName = DV->getName(); - - if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); - isPointer = true; - } - - DICompositeType blockStruct = DICompositeType(TmpTy); - - // Find the __forwarding field and the variable field in the __Block_byref - // struct. - DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); - - for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - StringRef fieldName = DT.getName(); - if (fieldName == "__forwarding") - forwardingField = Element; - else if (fieldName == varName) - varField = Element; - } - - // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - // If we started with a pointer to the __Block_byref... struct, then - // the first thing we need to do is dereference the pointer (DW_OP_deref). - if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Next add the offset for the '__forwarding' field: - // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in - // adding the offset if it's 0. - if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); - } - - // Now dereference the __forwarding field to get to the real __Block_byref - // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Now that we've got the real __Block_byref... struct, add the offset - // for the variable's field to get to the location of the actual variable: - // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. - if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/// addAddress - Add an address attribute to a die based on the location -/// provided. -void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (RI->getFrameRegister(*Asm->MF) == Location.getReg() - && Location.getOffset()) { - // If variable offset is based in frame register then use fbreg. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); - addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - addBlock(Die, Attribute, 0, Block); - return; - } - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - addBlock(Die, Attribute, 0, Block); -} - -/// addRegisterAddress - Add register location entry in variable DIE. -bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) { - assert (MO.isReg() && "Invalid machine operand!"); - if (!MO.getReg()) - return false; - MachineLocation Location; - Location.set(MO.getReg()); - addAddress(Die, dwarf::DW_AT_location, Location); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned Imm = MO.getImm(); - addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantFPValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - APFloat FPImm = MO.getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); - - int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI, - bool Unsigned) { - if (CI->getBitWidth() <= 64) { - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - CI->getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - CI->getSExtValue()); - return true; - } - - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); - const char *Ptr = (const char*)Val.getRawData(); - - int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & Ptr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) - ContextDIE->addChild(Die); - else - getCompileUnit(Context)->addDie(Die); -} - -/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the -/// given DIType. -DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { - CompileUnit *TypeCU = getCompileUnit(Ty); - DIE *TyDIE = TypeCU->getDIE(Ty); - if (TyDIE) - return TyDIE; - - // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - TypeCU->insertDIE(Ty, TyDIE); - if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty)); - else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty)); - } - - addToContextOwner(TyDIE, Ty.getContext()); - return TyDIE; -} - -/// addType - Add a new type attribute to the specified entity. -void DwarfDebug::addType(DIE *Entity, DIType Ty) { - if (!Ty.Verify()) - return; - - // Check for pre-existence. - CompileUnit *TypeCU = getCompileUnit(Ty); - DIEEntry *Entry = TypeCU->getDIEEntry(Ty); - // If it exists then use the existing value. - if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); - return; - } - - // Construct type. - DIE *Buffer = getOrCreateTypeDIE(Ty); - - // Set up proxy. - Entry = createDIEEntry(Buffer); - TypeCU->insertDIEEntry(Ty, Entry); - - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); -} - -/// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { - // Get core information. - StringRef Name = BTy.getName(); - Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); -} - -/// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { - // Get core information. - StringRef Name = DTy.getName(); - uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); - - // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - - // Add source line info if available and TyDesc is not a forward declaration. - if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, DTy); -} - -/// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { - // Get core information. - StringRef Name = CTy.getName(); - - uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); - - switch (Tag) { - case dwarf::DW_TAG_vector_type: - case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); - break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - } - break; - case dwarf::DW_TAG_subroutine_type: { - // Add return type. - DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); - - bool isPrototyped = true; - // Add arguments. - for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Elements.getElement(i); - if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); - isPrototyped = false; - } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Ty)); - Buffer.addChild(Arg); - } - } - // Add prototype flag. - if (isPrototyped) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); - } - break; - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_class_type: { - // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIE *ElemDie = NULL; - if (Element.isSubprogram()) { - DISubprogram SP(Element); - ElemDie = createSubprogramDIE(DISubprogram(Element)); - if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); - addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else - continue; - Buffer.addChild(ElemDie); - } - - if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } - - if (Tag == dwarf::DW_TAG_class_type) { - DIArray TParams = CTy.getTemplateParams(); - unsigned N = TParams.getNumElements(); - // Add template parameters. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = TParams.getElement(i); - if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); - else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); - } - } - break; - } - default: - break; - } - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type - || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - else { - // Add zero size if it is not a forward declaration. - if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - else - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); - } - - // Add source line info if available. - if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, CTy); - } -} - -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - CompileUnit *TypeCU = getCompileUnit(TP); - DIE *ParamDIE = TypeCU->getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); - return ParamDIE; -} - -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { - CompileUnit *TVCU = getCompileUnit(TPV); - DIE *ParamDIE = TVCU->getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; -} - -/// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - - Buffer.addChild(DW_Subrange); -} - -/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); - - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); - - // Get an anonymous type for index type. - CompileUnit *TheCU = getCompileUnit(*CTy); - DIE *IdxTy = TheCU->getIndexTyDie(); - if (!IdxTy) { - // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); - addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); - TheCU->addDie(IdxTy); - TheCU->setIndexTyDie(IdxTy); - } - - // Add subranges to array type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); - } -} - -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; -} - /// getRealLinkageName - If special LLVM prefix that is used to inform the asm /// printer to not emit usual symbol prefix before the symbol name is used then /// return linkage name after skipping this special LLVM prefix. @@ -1295,84 +314,6 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); - StringRef Name = DT.getName(); - if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - addType(MemberDie, DT.getTypeDerivedFrom()); - - addSourceLine(MemberDie, DT); - - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getTargetData().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { - - // For C++, virtual base classes are not at fixed offset. Use following - // expression to extract appropriate offset from vtable. - // BaseAddr = ObAddr + *((*ObAddr) - Offset) - - DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); - - if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - // Otherwise C++ member and base classes are considered public. - else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, - dwarf::DW_VIRTUALITY_virtual); - return MemberDie; -} - /// createSubprogramDIE - Create new DIE using SP. DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { CompileUnit *SPCU = getCompileUnit(SP); @@ -1381,19 +322,35 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { return SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + SPCU->insertDIE(SP, SPDie); + + // Add to context owner. + SPCU->addToContextOwner(SPDie, SP.getContext()); + + // Add function template parameters. + SPCU->addTemplateParams(*SPDie, SP.getTemplateParams()); + + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (SP.getFunctionDeclaration().isSubprogram()) + return SPDie; + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + SP.getName()); StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); - addSourceLine(SPDie, SP); + SPCU->addSourceLine(SPDie, SP); if (SP.isPrototyped()) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1401,24 +358,24 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { unsigned SPTag = SPTy.getTag(); if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); + SPCU->addType(SPDie, SPTy); else - addType(SPDie, DIType(Args.getElement(0))); + SPCU->addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = SPCU->getDIEBlock(); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. DICompositeType SPTy = SP.getType(); @@ -1429,32 +386,26 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); + SPCU->addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); if (unsigned isa = Asm->getISAEncoding()) { - addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } - // DW_TAG_inlined_subroutine may refer to this DIE. - SPCU->insertDIE(SP, SPDie); - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); - return SPDie; } @@ -1509,51 +460,57 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); - if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Refer function declaration directly. + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + createSubprogramDIE(SPDecl)); + else { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } } - // Pick up abstract subprogram DIE. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); } - addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); return SPDie; } @@ -1570,13 +527,14 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { if (Ranges.empty()) return 0; + CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode()); SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -1595,8 +553,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); return ScopeDIE; } @@ -1636,11 +594,11 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { CompileUnit *TheCU = getCompileUnit(InlinedSP); DIE *OriginDIE = TheCU->getDIE(InlinedSP); assert(OriginDIE && "Unable to find Origin DIE!"); - addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); InlinedSubprogramDIEs.insert(OriginDIE); @@ -1656,8 +614,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); - addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; } @@ -1686,7 +644,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - + CompileUnit *VariableCU = getCompileUnit(DV->getVariable()); DIE *AbsDIE = NULL; DenseMap<const DbgVariable *, const DbgVariable *>::iterator V2AVI = VarToAbstractVarMap.find(DV); @@ -1694,20 +652,23 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { AbsDIE = V2AVI->second->getDIE(); if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addSourceLine(VariableDie, DV->getVariable()); + VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + Name); + VariableCU->addSourceLine(VariableDie, DV->getVariable()); // Add variable type. - addType(VariableDie, DV->getType()); + VariableCU->addType(VariableDie, DV->getType()); } if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); else if (DIVariable(DV->getVariable()).isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { DV->setDIE(VariableDie); @@ -1718,7 +679,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { unsigned Offset = DV->getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, Asm->GetTempSymbol("debug_loc", Offset)); DV->setDIE(VariableDie); UseDotDebugLocEntry.insert(VariableDie); @@ -1738,22 +699,30 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (DVInsn->getOperand(1).isImm() && TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); - updated = true; - } else - updated = addRegisterAddress(VariableDie, RegOp); + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), + FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + + } else if (RegOp.getReg()) + VariableCU->addVariableAddress(DV, VariableDie, + MachineLocation(RegOp.getReg())); + updated = true; } else if (DVInsn->getOperand(0).isImm()) - updated = addConstantValue(VariableDie, DVInsn->getOperand(0)); + updated = VariableCU->addConstantValue(VariableDie, + DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isFPImm()) updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); } else { - MachineLocation Location = Asm->getDebugValueLocation(DVInsn); - if (Location.getReg()) { - addAddress(VariableDie, dwarf::DW_AT_location, Location); - updated = true; - } + VariableCU->addVariableAddress(DV, VariableDie, + Asm->getDebugValueLocation(DVInsn)); + updated = true; } if (!updated) { // If variableDie is not updated then DBG_VALUE instruction does not @@ -1767,15 +736,21 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // .. else use frame index, if available. int FI = 0; - if (findVariableFrameIndex(DV, &FI)) - addVariableAddress(DV, VariableDie, FI); - + if (findVariableFrameIndex(DV, &FI)) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + } + DV->setDIE(VariableDie); return VariableDie; } -void DwarfDebug::addPubTypes(DISubprogram SP) { +void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); unsigned SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) @@ -1789,9 +764,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { DICompositeType CATy = getDICompositeType(ATy); if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() && !CATy.isForwardDecl()) { - CompileUnit *TheCU = getCompileUnit(CATy); - if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) - TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); + if (DIEEntry *Entry = getDIEEntry(CATy)) + addGlobalType(CATy.getName(), Entry->getEntry()); } } } @@ -1802,6 +776,14 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { return NULL; SmallVector <DIE *, 8> Children; + + // Collect arguments for current function. + if (Scope == CurrentFnDbgScope) + for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) + if (DbgVariable *ArgDV = CurrentFnArguments[i]) + if (DIE *Arg = constructVariableDIE(ArgDV, Scope)) + Children.push_back(Arg); + // Collect lexical scope childrens first. const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) @@ -1841,7 +823,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - addPubTypes(DISubprogram(DS)); + getCompileUnit(DS)->addPubTypes(DISubprogram(DS)); return ScopeDIE; } @@ -1851,10 +833,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ +unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { // If FE did not provide a file name, then assume stdin. if (FileName.empty()) - return GetOrCreateSourceID("<stdin>"); + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !FileName.startswith("/")) { + std::string FullPathName(DirName.data()); + if (!DirName.endswith("/")) + FullPathName += "/"; + FullPathName += FileName.data(); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); if (Entry.getValue()) @@ -1864,19 +857,18 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ Entry.setValue(SrcId); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); return SrcId; } /// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { - CompileUnit *TheCU = getCompileUnit(NS); - DIE *NDie = TheCU->getDIE(NS); +DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + DIE *NDie = getDIE(NS); if (NDie) return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); - TheCU->insertDIE(NS, NDie); + insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); addSourceLine(NDie, NS); @@ -1890,40 +882,40 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN); + unsigned ID = GetOrCreateSourceID(FN, Dir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); - addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); - addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); + NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer()); + NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) - addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("section_line")); + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("section_line")); else - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) - addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) - addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); - + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + unsigned RVer = DIUnit.getRunTimeVersion(); if (RVer) - addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - CompileUnit *NewCU = new CompileUnit(ID, Die); if (!FirstCU) FirstCU = NewCU; CUMap.insert(std::make_pair(N, NewCU)); @@ -2019,14 +1011,15 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { bool isGlobalVariable = GV.getGlobal() != NULL; // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); // Add type. - addType(VariableDIE, GTy); + TheCU->addType(VariableDIE, GTy); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { DIEEntry *Entry = TheCU->getDIEEntry(GTy); @@ -2035,22 +1028,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } // Add scoping info. if (!GV.isLocalToUnit()) { - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // Expose as global. TheCU->addGlobal(GV.getName(), VariableDIE); } // Add line number info. - addSourceLine(VariableDIE, GV); + TheCU->addSourceLine(VariableDIE, GV); // Add to map. TheCU->insertDIE(N, VariableDIE); // Add to context owner. DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); + TheCU->addToContextOwner(VariableDIE, GVContext); // Add location. if (isGlobalVariable) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. @@ -2058,28 +1051,28 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); TheCU->addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } } else if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) - addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); + TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } return; @@ -2105,7 +1098,7 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) { TheCU->insertDIE(N, SubprogramDie); // Add to context owner. - addToContextOwner(SubprogramDie, SP.getContext()); + TheCU->addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); @@ -2160,12 +1153,16 @@ void DwarfDebug::beginModule(Module *M) { //getOrCreateTypeDIE if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); @@ -2216,7 +1213,7 @@ void DwarfDebug::endModule() { for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(), @@ -2226,7 +1223,8 @@ void DwarfDebug::endModule() { if (!N) continue; DIE *NDie = getCompileUnit(N)->getDIE(N); if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, + dwarf::DW_FORM_ref4, NDie); } // Standard sections final addresses. @@ -2309,6 +1307,30 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, return AbsDbgVariable; } +/// addCurrentFnArgument - If Var is an current function argument that add +/// it in CurrentFnArguments list. +bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, DbgScope *Scope) { + if (Scope != CurrentFnDbgScope) + return false; + DIVariable DV = Var->getVariable(); + if (DV.getTag() != dwarf::DW_TAG_arg_variable) + return false; + unsigned ArgNo = DV.getArgNumber(); + if (ArgNo == 0) + return false; + + size_t Size = CurrentFnArguments.size(); + if (Size == 0) + CurrentFnArguments.resize(MF->getFunction()->arg_size()); + // llvm::Function argument size is not good indicator of how many + // arguments does the function have at source level. + if (ArgNo > Size) + CurrentFnArguments.resize(ArgNo * 2); + CurrentFnArguments[ArgNo - 1] = Var; + return true; +} + /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. void @@ -2337,7 +1359,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); DbgVariable *RegVar = new DbgVariable(DV); recordVariableFrameIndex(RegVar, VP.first); - Scope->addVariable(RegVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + Scope->addVariable(RegVar); if (AbsDbgVariable) { recordVariableFrameIndex(AbsDbgVariable, VP.first); VarToAbstractVarMap[RegVar] = AbsDbgVariable; @@ -2349,9 +1372,9 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - return true; - return false; + return MI->getNumOperands() == 3 && + MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && + MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } /// collectVariableInfo - Populate DbgScope entries with variables' info. @@ -2362,41 +1385,21 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); - SmallVector<const MachineInstr *, 8> DbgValues; - // Collect variable information from DBG_VALUE machine instructions; - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - if (!MInsn->isDebugValue()) - continue; - DbgValues.push_back(MInsn); - } - - // This is a collection of DBV_VALUE instructions describing same variable. - SmallVector<const MachineInstr *, 4> MultipleValues; - for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(), - E = DbgValues.end(); I != E; ++I) { - const MachineInstr *MInsn = *I; - MultipleValues.clear(); - if (isDbgValueInDefinedReg(MInsn)) - MultipleValues.push_back(MInsn); - DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()); - if (Processed.count(DV) != 0) + for (SmallVectorImpl<const MDNode*>::const_iterator + UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; + ++UVI) { + const MDNode *Var = *UVI; + if (Processed.count(Var)) continue; - const MachineInstr *PrevMI = MInsn; - for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1, - ME = DbgValues.end(); MI != ME; ++MI) { - const MDNode *Var = - (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && - !PrevMI->isIdenticalTo(*MI)) - MultipleValues.push_back(*MI); - PrevMI = *MI; - } + // History contains relevant DBG_VALUE instructions for Var and instructions + // clobbering it. + SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + if (History.empty()) + continue; + const MachineInstr *MInsn = History.front(); + DIVariable DV(Var); DbgScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) @@ -2408,32 +1411,29 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; Processed.insert(DV); + assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *RegVar = new DbgVariable(DV); - Scope->addVariable(RegVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + Scope->addVariable(RegVar); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; VarToAbstractVarMap[RegVar] = AbsVar; } - if (MultipleValues.size() <= 1) { + + // Simple ranges that are fully coalesced. + if (History.size() <= 1 || (History.size() == 2 && + MInsn->isIdenticalTo(History.back()))) { DbgVariableToDbgInstMap[RegVar] = MInsn; continue; } // handle multiple DBG_VALUE instructions describing one variable. - if (DotDebugLocEntries.empty()) - RegVar->setDotDebugLocOffset(0); - else - RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - const MachineInstr *Begin = NULL; - const MachineInstr *End = NULL; - for (SmallVector<const MachineInstr *, 4>::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); - MVI != MVE; ++MVI) { - if (!Begin) { - Begin = *MVI; - continue; - } - End = *MVI; + RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); + + for (SmallVectorImpl<const MachineInstr*>::const_iterator + HI = History.begin(), HE = History.end(); HI != HE; ++HI) { + const MachineInstr *Begin = *HI; + assert(Begin->isDebugValue() && "Invalid History entry"); MachineLocation MLoc; if (Begin->getNumOperands() == 3) { if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) @@ -2441,25 +1441,32 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } else MLoc = Asm->getDebugValueLocation(Begin); + // FIXME: emitDebugLoc only understands registers. + if (!MLoc.getReg()) + continue; + + // Compute the range for a register location. const MCSymbol *FLabel = getLabelBeforeInsn(Begin); - const MCSymbol *SLabel = getLabelBeforeInsn(End); - if (MLoc.getReg()) - DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); - - Begin = End; - if (MVI + 1 == MVE) { - // If End is the last instruction then its value is valid - // until the end of the funtion. - MachineLocation EMLoc; - if (End->getNumOperands() == 3) { - if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm()) - EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); - } else - EMLoc = Asm->getDebugValueLocation(End); - if (EMLoc.getReg()) - DotDebugLocEntries. - push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc)); + const MCSymbol *SLabel = 0; + + if (HI + 1 == HE) + // If Begin is the last instruction in History then its value is valid + // until the end of the function. + SLabel = FunctionEndSym; + else { + const MachineInstr *End = HI[1]; + if (End->isDebugValue()) + SLabel = getLabelBeforeInsn(End); + else { + // End is a normal instruction clobbering the range. + SLabel = getLabelAfterInsn(End); + assert(SLabel && "Forgot label after clobber instruction"); + ++HI; + } } + + // The value is valid until the next DBG_VALUE or clobber. + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -2480,66 +1487,74 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, /// getLabelBeforeInsn - Return Label preceding the instruction. const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { - DenseMap<const MachineInstr *, MCSymbol *>::iterator I = - LabelsBeforeInsn.find(MI); - if (I == LabelsBeforeInsn.end()) - // FunctionBeginSym always preceeds all the instruction in current function. - return FunctionBeginSym; - return I->second; + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; } /// getLabelAfterInsn - Return Label immediately following the instruction. const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { - DenseMap<const MachineInstr *, MCSymbol *>::iterator I = - LabelsAfterInsn.find(MI); - if (I == LabelsAfterInsn.end()) - return NULL; - return I->second; + return LabelsAfterInsn.lookup(MI); } /// beginInstruction - Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { - if (InsnNeedsLabel.count(MI) == 0) { - LabelsBeforeInsn[MI] = PrevLabel; - return; + // Check if source location changes, but ignore DBG_VALUE locations. + if (!MI->isDebugValue()) { + DebugLoc DL = MI->getDebugLoc(); + if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { + PrevInstLoc = DL; + if (!DL.isUnknown()) { + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + recordSourceLine(DL.getLine(), DL.getCol(), Scope); + } else + recordSourceLine(0, 0, 0); + } } - // Check location. - DebugLoc DL = MI->getDebugLoc(); - if (!DL.isUnknown()) { - const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); - PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - LabelsBeforeInsn[MI] = PrevLabel; - return; - } + // Insert labels where requested. + DenseMap<const MachineInstr*, MCSymbol*>::iterator I = + LabelsBeforeInsn.find(MI); - // If location is unknown then use temp label for this DBG_VALUE - // instruction. - if (MI->isDebugValue()) { - PrevLabel = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(PrevLabel); - LabelsBeforeInsn[MI] = PrevLabel; + // No label needed. + if (I == LabelsBeforeInsn.end()) return; - } - if (UnknownLocations) { - PrevLabel = recordSourceLine(0, 0, 0); - LabelsBeforeInsn[MI] = PrevLabel; + // Label already assigned. + if (I->second) return; - } - assert (0 && "Instruction is not processed!"); + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); + } + I->second = PrevLabel; } /// endInstruction - Process end of an instruction. void DwarfDebug::endInstruction(const MachineInstr *MI) { - if (InsnsEndScopeSet.count(MI) != 0) { - // Emit a label if this instruction ends a scope. - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - LabelsAfterInsn[MI] = Label; + // Don't create a new label after DBG_VALUE instructions. + // They don't generate code. + if (!MI->isDebugValue()) + PrevLabel = 0; + + DenseMap<const MachineInstr*, MCSymbol*>::iterator I = + LabelsAfterInsn.find(MI); + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); } + I->second = PrevLabel; } /// getOrCreateDbgScope - Create DbgScope for the scope. @@ -2799,7 +1814,8 @@ void DwarfDebug::identifyScopeMarkers() { RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "DbgRange does not have first instruction!"); assert(RI->second && "DbgRange does not have second instruction!"); - InsnsEndScopeSet.insert(RI->second); + requestLabelBeforeInsn(RI->first); + requestLabelAfterInsn(RI->second); } } } @@ -2877,45 +1893,145 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, TheScope); + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + /// ProcessedArgs - Collection of arguments already processed. SmallPtrSet<const MDNode *, 8> ProcessedArgs; - DebugLoc PrevLoc; + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + + /// LiveUserVar - Map physreg numbers to the MDNode they contain. + std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs()); + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) + I != E; ++I) { + bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MI = II; - DebugLoc DL = MI->getDebugLoc(); + if (MI->isDebugValue()) { assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); - DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); - if (!DV.Verify()) continue; - // If DBG_VALUE is for a local variable then it needs a label. - if (DV.getTag() != dwarf::DW_TAG_arg_variable) - InsnNeedsLabel.insert(MI); - // DBG_VALUE for inlined functions argument needs a label. - else if (!DISubprogram(getDISubprogram(DV.getContext())). - describes(MF->getFunction())) - InsnNeedsLabel.insert(MI); - // DBG_VALUE indicating argument location change needs a label. - else if (!ProcessedArgs.insert(DV)) - InsnNeedsLabel.insert(MI); + + // Keep track of user variables. + const MDNode *Var = + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + // Variable is in a register, we need to check for clobbers. + if (isDbgValueInDefinedReg(MI)) + LiveUserVar[MI->getOperand(0).getReg()] = Var; + + // Check the history of this variable. + SmallVectorImpl<const MachineInstr*> &History = DbgValues[Var]; + if (History.empty()) { + UserVariables.push_back(Var); + // The first mention of a function argument gets the FunctionBeginSym + // label, so arguments are visible when breaking at function entry. + DIVariable DV(Var); + if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(getDISubprogram(DV.getContext())) + .describes(MF->getFunction())) + LabelsBeforeInsn[MI] = FunctionBeginSym; + } else { + // We have seen this variable before. Try to coalesce DBG_VALUEs. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue()) { + // Coalesce identical entries at the end of History. + if (History.size() >= 2 && + Prev->isIdenticalTo(History[History.size() - 2])) + History.pop_back(); + + // Terminate old register assignments that don't reach MI; + MachineFunction::const_iterator PrevMBB = Prev->getParent(); + if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) && + isDbgValueInDefinedReg(Prev)) { + // Previous register assignment needs to terminate at the end of + // its basic block. + MachineBasicBlock::const_iterator LastMI = + PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } + } + } + } + History.push_back(MI); } else { - // If location is unknown then instruction needs a location only if - // UnknownLocations flag is set. - if (DL.isUnknown()) { - if (UnknownLocations && !PrevLoc.isUnknown()) - InsnNeedsLabel.insert(MI); - } else if (DL != PrevLoc) - // Otherwise, instruction needs a location only if it is new location. - InsnNeedsLabel.insert(MI); + // Not a DBG_VALUE instruction. + if (!MI->isLabel()) + AtBlockEntry = false; + + // Check if the instruction clobbers any registers with debug vars. + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) + continue; + for (const unsigned *AI = TRI->getOverlaps(MOI->getReg()); + unsigned Reg = *AI; ++AI) { + const MDNode *Var = LiveUserVar[Reg]; + if (!Var) + continue; + // Reg is now clobbered. + LiveUserVar[Reg] = 0; + + // Was MD last defined by a DBG_VALUE referring to Reg? + DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); + if (HistI == DbgValues.end()) + continue; + SmallVectorImpl<const MachineInstr*> &History = HistI->second; + if (History.empty()) + continue; + const MachineInstr *Prev = History.back(); + // Sanity-check: Register assignments are terminated at the end of + // their block. + if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent()) + continue; + // Is the variable still in Reg? + if (!isDbgValueInDefinedReg(Prev) || + Prev->getOperand(0).getReg() != Reg) + continue; + // Var is clobbered. Make sure the next instruction gets a label. + History.push_back(MI); + } + } } + } + } + + for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); + I != E; ++I) { + SmallVectorImpl<const MachineInstr*> &History = I->second; + if (History.empty()) + continue; - if (!DL.isUnknown() || UnknownLocations) - PrevLoc = DL; + // Make sure the final register assignments are terminated. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { + const MachineBasicBlock *PrevMBB = Prev->getParent(); + MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } } + // Request labels for the full history. + for (unsigned i = 0, e = History.size(); i != e; ++i) { + const MachineInstr *MI = History[i]; + if (MI->isDebugValue()) + requestLabelBeforeInsn(MI); + else + requestLabelAfterInsn(MI); + } + } + PrevInstLoc = DebugLoc(); PrevLabel = FunctionBeginSym; } @@ -2963,8 +2079,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) - addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, + dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), @@ -2973,12 +2090,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; - InsnNeedsLabel.clear(); + DeleteContainerPointers(CurrentFnArguments); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); DeleteContainerSeconds(DbgScopeMap); - InsnsEndScopeSet.clear(); + UserVariables.clear(); + DbgValues.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); @@ -3029,10 +2147,9 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, - const MDNode *S) { +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){ StringRef Fn; - + StringRef Dir; unsigned Src = 1; if (S) { DIDescriptor Scope(S); @@ -3040,27 +2157,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, if (Scope.isCompileUnit()) { DICompileUnit CU(S); Fn = CU.getFilename(); + Dir = CU.getDirectory(); } else if (Scope.isFile()) { DIFile F(S); Fn = F.getFilename(); + Dir = F.getDirectory(); } else if (Scope.isSubprogram()) { DISubprogram SP(S); Fn = SP.getFilename(); + Dir = SP.getDirectory(); } else if (Scope.isLexicalBlock()) { DILexicalBlock DB(S); Fn = DB.getFilename(); + Dir = DB.getDirectory(); } else assert(0 && "Unexpected scope info"); - Src = GetOrCreateSourceID(Fn); + Src = GetOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT, - 0, 0); - - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - return Label; + 0, 0, Fn); } //===----------------------------------------------------------------------===// @@ -3118,17 +2234,15 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// void DwarfDebug::computeSizeAndOffsets() { - unsigned PrevOffset = 0; for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { // Compute size of compile unit header. - static unsigned Offset = PrevOffset + + unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) computeSizeAndOffset(I->second->getCUDie(), Offset, true); - PrevOffset = Offset; } } @@ -3289,8 +2403,7 @@ void DwarfDebug::emitDebugInfo() { unsigned ContentSize = Die->getSize() + sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int32_t); // FIXME - extra pad for gdb bug. + sizeof(int8_t); // Pointer Size (in bytes) Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); @@ -3303,12 +2416,6 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitInt8(Asm->getTargetData().getPointerSize()); emitDIE(Die); - // FIXME - extra padding for gdb bug. - Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); } } @@ -3614,32 +2721,38 @@ void DwarfDebug::emitDebugLoc() { } else { Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); - if (int Offset = Entry.Loc.getOffset()) { - // If the value is at a certain offset from frame register then - // use DW_OP_fbreg. - unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; + DIVariable DV(Entry.Variable); + if (DV.hasComplexAddress()) { + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + OffsetSize); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); - Asm->EmitInt8(dwarf::DW_OP_fbreg); - Asm->OutStreamer.AddComment("Offset"); - Asm->EmitSLEB128(Offset); - } else { - if (Reg < 32) { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Loc) + N - 2); + Asm->EmitDwarfRegOp(Loc); +// Asm->EmitULEB128(DV.getAddrElement(1)); + i = 2; } else { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); - Asm->EmitInt8(dwarf::DW_OP_regx); - Asm->EmitULEB128(Reg); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc) + N); + Asm->EmitDwarfRegOp(Entry.Loc); + } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); } + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc)); + Asm->EmitDwarfRegOp(Entry.Loc); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7df0510fbfba..25f2675d40f0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Analysis/DebugInfo.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -40,21 +41,6 @@ class DIE; class DIEBlock; class DIEEntry; -class DIEnumerator; -class DIDescriptor; -class DIVariable; -class DIGlobal; -class DIGlobalVariable; -class DISubprogram; -class DIBasicType; -class DIDerivedType; -class DIType; -class DINameSpace; -class DISubrange; -class DICompositeType; -class DITemplateTypeParameter; -class DITemplateValueParameter; - //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. /// @@ -80,10 +66,12 @@ typedef struct DotDebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; MachineLocation Loc; + const MDNode *Variable; bool Merged; - DotDebugLocEntry() : Begin(0), End(0), Merged(false) {} - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) - : Begin(B), End(E), Loc(L), Merged(false) {} + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, + const MDNode *V) + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) {} /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. bool isEmpty() { return Begin == 0 && End == 0; } @@ -96,6 +84,43 @@ typedef struct DotDebugLocEntry { } } DotDebugLocEntry; +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class DbgVariable { + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. +public: + // AbsVar may be NULL. + DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const; +}; + class DwarfDebug { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -122,12 +147,6 @@ class DwarfDebug { /// id mapped to a unique id. StringMap<unsigned> SourceIdMap; - /// DIEBlocks - A list of all the DIEBlocks in use. - std::vector<DIEBlock *> DIEBlocks; - - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - /// StringPool - A String->Symbol mapping of strings used by indirect /// references. StringMap<std::pair<MCSymbol*, unsigned> > StringPool; @@ -139,10 +158,13 @@ class DwarfDebug { /// UniqueVector<const MCSection*> SectionMap; - // CurrentFnDbgScope - Top level scope for the current function. - // + /// CurrentFnDbgScope - Top level scope for the current function. + /// DbgScope *CurrentFnDbgScope; + /// CurrentFnArguments - List of Arguments (DbgValues) for current function. + SmallVector<DbgVariable *, 8> CurrentFnArguments; + /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. /// @@ -195,10 +217,6 @@ class DwarfDebug { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - typedef SmallVector<DbgScope *, 2> ScopeVector; - - SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels; @@ -217,9 +235,16 @@ class DwarfDebug { /// instruction. DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; - /// insnNeedsLabel - Collection of instructions that need a label to mark - /// a debuggging information entity. - SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel; + /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction + /// in order of appearance. + SmallVector<const MDNode*, 8> UserVariables; + + /// DbgValues - For each user variable, keep a list of DBG_VALUE + /// instructions in order. The list can also contain normal instructions that + /// clobber the previous DBG_VALUE. + typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> > + DbgValueHistoryMap; + DbgValueHistoryMap DbgValues; SmallVector<const MCSymbol *, 8> DebugRangeSymbols; @@ -238,6 +263,9 @@ class DwarfDebug { std::vector<FunctionDebugFrameInfo> DebugFrames; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -246,150 +274,12 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - DIEInteger *DIEIntegerOne; private: - /// getNumSourceIds - Return the number of unique source ids. - unsigned getNumSourceIds() const { - return SourceIdMap.size(); - } - /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void assignAbbrevNumber(DIEAbbrev &Abbrev); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); - - /// addUInt - Add an unsigned integer attribute data and value. - /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); - - /// addSInt - Add an signed integer attribute data and value. - /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); - - /// addString - Add a string attribute data and value. - /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); - - /// addLabel - Add a Dwarf label attribute data and value. - /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label); - - /// addDelta - Add a label delta attribute data and value. - /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); - - /// addDIEEntry - Add a DIE attribute data and value. - /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); - - /// addBlock - Add block data. - /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); - - /// addSourceLine - Add location information to specified debug information - /// entry. - void addSourceLine(DIE *Die, DIVariable V); - void addSourceLine(DIE *Die, DIGlobalVariable G); - void addSourceLine(DIE *Die, DISubprogram SP); - void addSourceLine(DIE *Die, DIType Ty); - void addSourceLine(DIE *Die, DINameSpace NS); - - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addRegisterAddress - Add register location entry in variable DIE. - bool addRegisterAddress(DIE *Die, const MachineOperand &MO); - - /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); - - /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - // FIXME: Should be reformulated in terms of addComplexAddress. - /// addBlockByrefAddress - Start with the address based on the location - /// provided, and generate the DWARF information necessary to find the - /// actual Block variable (navigating the Block struct) based on the - /// starting location. Add the DWARF information to the die. Obsolete, - /// please use addComplexAddress instead. - /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based - /// on provided frame index. - void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); - - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); - - - /// getOrCreateNameSpace - Create a DIE for DINameSpace. - DIE *getOrCreateNameSpace(DINameSpace NS); - - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the - /// given DIType. - DIE *getOrCreateTypeDIE(DIType Ty); - - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); - - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); - - void addPubTypes(DISubprogram SP); - - /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); - - /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); - - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); - - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. - void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - - /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); - - /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); - - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); - - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -504,11 +394,6 @@ private: /// inlining instance. void emitDebugInlineInfo(); - /// GetOrCreateSourceID - Look up the source id with the given directory and - /// source file names. If none currently exists, create a new id and insert it - /// in the SourceIds map. - unsigned GetOrCreateSourceID(StringRef FullName); - /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void constructCompileUnit(const MDNode *N); @@ -525,7 +410,7 @@ private: /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. - MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); /// recordVariableFrameIndex - Record a variable's index. void recordVariableFrameIndex(const DbgVariable *V, int Index); @@ -546,6 +431,11 @@ private: /// and collect DbgScopes. Return true, if atleast one scope was found. bool extractScopeInformation(); + /// addCurrentFnArgument - If Var is an current function argument that add + /// it in CurrentFnArguments list. + bool addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, DbgScope *Scope); + /// collectVariableInfo - Populate DbgScope entries with variables' info. void collectVariableInfo(const MachineFunction *, SmallPtrSet<const MDNode *, 16> &ProcessedVars); @@ -554,6 +444,23 @@ private: /// side table maintained by MMI. void collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet<const MDNode *, 16> &P); + + /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI. + void requestLabelBeforeInsn(const MachineInstr *MI) { + LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// getLabelBeforeInsn - Return Label preceding the instruction. + const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// requestLabelAfterInsn - Ensure that a label will be emitted after MI. + void requestLabelAfterInsn(const MachineInstr *MI) { + LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// getLabelAfterInsn - Return Label immediately following the instruction. + const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -577,17 +484,19 @@ public: /// void endFunction(const MachineFunction *MF); - /// getLabelBeforeInsn - Return Label preceding the instruction. - const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - - /// getLabelAfterInsn - Return Label immediately following the instruction. - const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - /// beginInstruction - Process beginning of an instruction. void beginInstruction(const MachineInstr *MI); /// endInstruction - Prcess end of an instruction. void endInstruction(const MachineInstr *MI); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); + + /// createSubprogramDIE - Create new DIE using SP. + DIE *createSubprogramDIE(DISubprogram SP); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index a172e53f8ac7..f11164122cc4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -140,17 +140,18 @@ public: }; class DwarfCFIException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; + /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality + /// should be emitted. + bool shouldEmitPersonality; + + /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda + /// should be emitted. + bool shouldEmitLSDA; /// shouldEmitMoves - Per-function flag to indicate if frame moves info /// should be emitted. bool shouldEmitMoves; - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. @@ -237,6 +238,38 @@ public: virtual void EndFunction(); }; + +class ARMException : public DwarfException { + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + ARMException(AsmPrinter *A); + virtual ~ARMException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp index 751901183cd0..b50d8bd3cecc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp @@ -92,7 +92,7 @@ void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) // personality function reference: unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(false); unsigned PerEncoding = TLOF.getPersonalityEncoding(); char Augmentation[6] = { 0 }; @@ -168,7 +168,7 @@ void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(false); Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 78a87431feaa..77043406bc85 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1048,7 +1048,7 @@ ReoptimizeBlock: // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken()) { + !MBB->hasAddressTaken() && !MBB->isLandingPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d7d0e1b3812b..2ca3859caf04 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen GCStrategy.cpp IfConversion.cpp InlineSpiller.cpp + InterferenceCache.cpp IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 76bb3d148b0b..e5894b8cca9d 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -87,8 +87,8 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, } void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineRegisterInfo &mri = MF.getRegInfo(); + const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; unsigned loopDepth = 0; @@ -103,6 +103,9 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // Don't recompute a target specific hint. bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + // Don't recompute spill weight for an unspillable register. + bool Spillable = li.isSpillable(); + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); MachineInstr *mi = I.skipInstruction();) { if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) @@ -110,34 +113,37 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (!visited.insert(mi)) continue; - // Get loop info for mi. - if (mi->getParent() != mbb) { - mbb = mi->getParent(); - loop = loops_.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; - isExiting = loop ? loop->isLoopExiting(mbb) : false; + float weight = 1.0f; + if (Spillable) { + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = Loops.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; } - // Calculate instr weight. - bool reads, writes; - tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); - - // Give extra weight to what looks like a loop induction variable update. - if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) - weight *= 3; - - totalWeight += weight; - // Get allocation hints from copies. if (noHint || !mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; - float hweight = hint_[hint] += weight; + float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && lis_.isAllocatable(hint)) + if (hweight > bestPhys && LIS.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) @@ -145,15 +151,19 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } } - hint_.clear(); + Hint.clear(); // Always prefer the physreg hint. if (unsigned hint = hintPhys ? hintPhys : hintVirt) { mri.setRegAllocationHint(li.reg, 0, hint); - // Weakly boost the spill weifght of hinted registers. + // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; } + // If the live interval was already unspillable, leave it that way. + if (!Spillable) + return; + // Mark li as unspillable if all live ranges are tiny. if (li.isZeroLength()) { li.markNotSpillable(); @@ -166,8 +176,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // FIXME: this gets much more complicated once we support non-trivial // re-materialization. bool isLoad = false; - SmallVector<LiveInterval*, 4> spillIs; - if (lis_.isReMaterializable(li, spillIs, isLoad)) { + if (LIS.isReMaterializable(li, 0, isLoad)) { if (isLoad) totalWeight *= 0.9F; else @@ -178,50 +187,29 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); - const TargetRegisterClass *orc = mri.getRegClass(reg); - SmallPtrSet<const TargetRegisterClass*,8> rcs; - - for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), - E = mri.reg_nodbg_end(); I != E; ++I) { - // The targets don't have accurate enough regclass descriptions that we can - // handle subregs. We need something similar to - // TRI::getMatchingSuperRegClass, but returning a super class instead of a - // sub class. - if (I.getOperand().getSubReg()) { - DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); - return; - } - if (const TargetRegisterClass *rc = - I->getDesc().getRegClass(I.getOperandNo(), tri)) - rcs.insert(rc); - } + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterClass *OldRC = MRI.getRegClass(reg); + const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); - // If we found no regclass constraints, just leave reg as is. - // In theory, we could inflate to the largest superclass of reg's existing - // class, but that might not be legal for the current cpu setting. - // This could happen if reg is only used by COPY instructions, so we may need - // to improve on this. - if (rcs.empty()) { + // Stop early if there is no room to grow. + if (NewRC == OldRC) return; - } - // Compute the intersection of all classes in rcs. - // This ought to be independent of iteration order, but if the target register - // classes don't form a proper algebra, it is possible to get different - // results. The solution is to make sure the intersection of any two register - // classes is also a register class or the null set. - const TargetRegisterClass *rc = 0; - for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(), - E = rcs.end(); I != E; ++I) { - rc = rc ? getCommonSubClass(rc, *I) : *I; - assert(rc && "Incompatible regclass constraints found"); + // Accumulate constraints from all uses. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + // TRI doesn't have accurate enough information to model this yet. + if (I.getOperand().getSubReg()) + return; + const TargetRegisterClass *OpRC = + I->getDesc().getRegClass(I.getOperandNo(), TRI); + if (OpRC) + NewRC = getCommonSubClass(NewRC, OpRC); + if (!NewRC || NewRC == OldRC) + return; } - - if (rc == orc) - return; - DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg) - << " to " << rc->getName() <<".\n"); - mri.setRegClass(reg, rc); + DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg) + << " to " << NewRC->getName() <<".\n"); + MRI.setRegClass(reg, NewRC); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 2ad80b4d3a75..bfb6ba10234f 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -19,15 +19,18 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, SmallVector<CCValAssign, 16> &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Invalid) { // No stack is used. StackOffset = 0; + clearFirstByValReg(); UsedRegs.resize((TRI.getNumRegs()+31)/32); } @@ -44,8 +47,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; + TM.getTargetLowering()->HandleByVal(const_cast<CCState*>(this), Size); unsigned Offset = AllocateStack(Size, Align); - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -155,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call result #" << i << " has unhandled type " - << EVT(VT).getEVTString(); + << EVT(VT).getEVTString() << "\n"; #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 91a9536e7757..270c337ef67e 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the pass that optimize code placement and align loop -// headers to target specific alignment boundary. +// This file implements the pass that optimizes code placement and aligns loop +// headers to target-specific alignment boundaries. // //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { - return "Code Placement Optimizater"; + return "Code Placement Optimizer"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -254,7 +254,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // Determine a position to move orphaned loop blocks to. If TopMBB is not // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // to the top of the loop to avoid losing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. MachineFunction::iterator InsertPt = diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 0ebb5b0db70e..34b1a396bb72 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -93,7 +93,8 @@ namespace { /// with the eh.exception call. This recursively looks past instructions /// which don't change the EH pointer value, like casts or PHI nodes. bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet<IntrinsicInst*, 8> &SelCalls); + SmallPtrSet<IntrinsicInst*, 8> &SelCalls, + SmallPtrSet<PHINode*, 32> &SeenPHIs); public: static char ID; // Pass identification, replacement for typeid. @@ -199,8 +200,8 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) { /// change the EH pointer value, like casts or PHI nodes. bool DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet<IntrinsicInst*, 8> &SelCalls) { - SmallPtrSet<PHINode*, 32> SeenPHIs; + SmallPtrSet<IntrinsicInst*, 8> &SelCalls, + SmallPtrSet<PHINode*, 32> &SeenPHIs) { bool Changed = false; for (Value::use_iterator @@ -215,11 +216,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, if (Invoke->getCalledFunction() == URoR) URoRInvoke = true; } else if (CastInst *CI = dyn_cast<CastInst>(II)) { - Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); + Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs); } else if (PHINode *PN = dyn_cast<PHINode>(II)) { if (SeenPHIs.insert(PN)) // Don't process a PHI node more than once. - Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls); + Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs); } } @@ -294,7 +295,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() { bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; - Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); + SmallPtrSet<PHINode*, 32> SeenPHIs; + Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs); if (URoRInvoke) { // This EH pointer is being used by an invoke of an URoR instruction and @@ -437,8 +439,9 @@ bool DwarfEHPrepare::NormalizeLandingPads() { if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", - NewBB); + PHINode *NewPN = PHINode::Create(PN->getType(), + PN->getNumIncomingValues(), + PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index e08feeb27539..5b634682cc87 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -173,7 +173,7 @@ namespace llvm { unsigned Offset; // sh_offset - Offset from the file start unsigned Size; // sh_size - The section size. unsigned Link; // sh_link - Section header table index link. - unsigned Info; // sh_info - Auxillary information. + unsigned Info; // sh_info - Auxiliary information. unsigned Align; // sh_addralign - Alignment of section. unsigned EntSize; // sh_entsize - Size of entries in the section e diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 0fd1e8e83bd7..fa2319bff704 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -77,7 +77,7 @@ ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) // Create the object code emitter object for this target. ElfCE = new ELFCodeEmitter(*this); - // Inital number of sections + // Initial number of sections NumSections = 0; } @@ -660,19 +660,21 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the /// function pointers, ignoring the init priority. void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { - // Should be an array of '{ int, void ()* }' structs. The first value is the + // Should be an array of '{ i32, void ()* }' structs. The first value is the // init priority, which we ignore. - if (!isa<ConstantArray>(List)) return; + if (List->isNullValue()) return; ConstantArray *InitList = cast<ConstantArray>(List); - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. - - if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. - // Emit the function pointer. - EmitGlobalConstant(CS->getOperand(1), Xtor); - } + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + if (InitList->getOperand(i)->isNullValue()) + continue; + ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i)); + + if (CS->getOperand(1)->isNullValue()) + continue; + + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1), Xtor); + } } bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index aed8bc947991..646e01407a4f 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -53,6 +53,19 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { EC.compress(); if (ViewEdgeBundles) view(); + + // Compute the reverse mapping. + Blocks.clear(); + Blocks.resize(getNumBundles()); + + for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { + unsigned b0 = getBundle(i, 0); + unsigned b1 = getBundle(i, 1); + Blocks[b0].push_back(i); + if (b1 != b0) + Blocks[b1].push_back(i); + } + return false; } @@ -82,5 +95,3 @@ raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, O << "}\n"; return O; } - - diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index b5ec303f5d93..ebc2fc91efa3 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Expand Psuedo-instructions produced by ISel. These are usually to allow +// Expand Pseudo-instructions produced by ISel. These are usually to allow // the expansion to contain control flow, such as a conditional move // implemented with a conditional branch and a phi, or an atomic operation // implemented with a loop. diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index db53b0473a9a..790200b8df5f 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -146,10 +145,6 @@ namespace { : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} }; - /// Roots - Basic blocks that do not have successors. These are the starting - /// points of Graph traversal. - std::vector<MachineBasicBlock*> Roots; - /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector<BBInfo> BBAnalysis; @@ -287,11 +282,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); - // Look for root nodes, i.e. blocks without successors. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - if (I->succ_empty()) - Roots.push_back(I); - std::vector<IfcvtToken*> Tokens; MadeChange = false; unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + @@ -406,7 +396,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } Tokens.clear(); - Roots.clear(); BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { @@ -924,13 +913,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, /// candidates. void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens) { - std::set<MachineBasicBlock*> Visited; - for (unsigned i = 0, e = Roots.size(); i != e; ++i) { - for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited), - E = idf_ext_end(Roots[i], Visited); I != E; ++I) { - MachineBasicBlock *BB = *I; - AnalyzeBlock(BB, Tokens); - } + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + AnalyzeBlock(BB, Tokens); } // Sort to favor more complex ifcvt scheme. diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 38e6c8590269..b1a33a6afa42 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -19,41 +19,75 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt<bool> -VerifySpills("verify-spills", cl::desc("Verify after each spill/split")); - namespace { class InlineSpiller : public Spiller { - MachineFunctionPass &pass_; - MachineFunction &mf_; - LiveIntervals &lis_; - LiveStacks &lss_; - AliasAnalysis *aa_; - VirtRegMap &vrm_; - MachineFrameInfo &mfi_; - MachineRegisterInfo &mri_; - const TargetInstrInfo &tii_; - const TargetRegisterInfo &tri_; - const BitVector reserved_; + MachineFunctionPass &Pass; + MachineFunction &MF; + LiveIntervals &LIS; + LiveStacks &LSS; + AliasAnalysis *AA; + MachineDominatorTree &MDT; + MachineLoopInfo &Loops; + VirtRegMap &VRM; + MachineFrameInfo &MFI; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; // Variables that are valid during spill(), but used by multiple methods. - LiveRangeEdit *edit_; - const TargetRegisterClass *rc_; - int stackSlot_; + LiveRangeEdit *Edit; + LiveInterval *StackInt; + int StackSlot; + unsigned Original; + + // All registers to spill to StackSlot, including the main register. + SmallVector<unsigned, 8> RegsToSpill; + + // All COPY instructions to/from snippets. + // They are ignored since both operands refer to the same stack slot. + SmallPtrSet<MachineInstr*, 8> SnippetCopies; // Values that failed to remat at some point. - SmallPtrSet<VNInfo*, 8> usedValues_; + SmallPtrSet<VNInfo*, 8> UsedValues; + + // Information about a value that was defined by a copy from a sibling + // register. + struct SibValueInfo { + // True when all reaching defs were reloads: No spill is necessary. + bool AllDefsAreReloads; + + // The preferred register to spill. + unsigned SpillReg; + + // The value of SpillReg that should be spilled. + VNInfo *SpillVNI; + + // A defining instruction that is not a sibling copy or a reload, or NULL. + // This can be used as a template for rematerialization. + MachineInstr *DefMI; + + SibValueInfo(unsigned Reg, VNInfo *VNI) + : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {} + }; + + // Values in RegsToSpill defined by sibling copies. + typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap; + SibValueMap SibValues; + + // Dead defs generated during spilling. + SmallVector<MachineInstr*, 8> DeadDefs; ~InlineSpiller() {} @@ -61,34 +95,52 @@ public: InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : pass_(pass), - mf_(mf), - lis_(pass.getAnalysis<LiveIntervals>()), - lss_(pass.getAnalysis<LiveStacks>()), - aa_(&pass.getAnalysis<AliasAnalysis>()), - vrm_(vrm), - mfi_(*mf.getFrameInfo()), - mri_(mf.getRegInfo()), - tii_(*mf.getTarget().getInstrInfo()), - tri_(*mf.getTarget().getRegisterInfo()), - reserved_(tri_.getReservedRegs(mf_)) {} - - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs); + : Pass(pass), + MF(mf), + LIS(pass.getAnalysis<LiveIntervals>()), + LSS(pass.getAnalysis<LiveStacks>()), + AA(&pass.getAnalysis<AliasAnalysis>()), + MDT(pass.getAnalysis<MachineDominatorTree>()), + Loops(pass.getAnalysis<MachineLoopInfo>()), + VRM(vrm), + MFI(*mf.getFrameInfo()), + MRI(mf.getRegInfo()), + TII(*mf.getTarget().getInstrInfo()), + TRI(*mf.getTarget().getRegisterInfo()) {} void spill(LiveRangeEdit &); private: - bool reMaterializeFor(MachineBasicBlock::iterator MI); + bool isSnippet(const LiveInterval &SnipLI); + void collectRegsToSpill(); + + bool isRegToSpill(unsigned Reg) { + return std::find(RegsToSpill.begin(), + RegsToSpill.end(), Reg) != RegsToSpill.end(); + } + + bool isSibling(unsigned Reg); + MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void analyzeSiblingValues(); + + bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); + void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); + + void markValueUsed(LiveInterval*, VNInfo*); + bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI); void reMaterializeAll(); - bool coalesceStackAccess(MachineInstr *MI); + bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl<unsigned> &Ops, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertReload(LiveInterval &NewLI, SlotIndex, + MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex, MachineBasicBlock::iterator MI); + + void spillAroundUses(unsigned Reg); + void spillAll(); }; } @@ -96,45 +148,489 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { - if (VerifySpills) - mf.verify(&pass, "When creating inline spiller"); return new InlineSpiller(pass, mf, vrm); } } +//===----------------------------------------------------------------------===// +// Snippets +//===----------------------------------------------------------------------===// + +// When spilling a virtual register, we also spill any snippets it is connected +// to. The snippets are small live ranges that only have a single real use, +// leftovers from live range splitting. Spilling them enables memory operand +// folding or tightens the live range around the single use. +// +// This minimizes register pressure and maximizes the store-to-load distance for +// spill slots which can be important in tight loops. + +/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, +/// otherwise return 0. +static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { + if (!MI->isCopy()) + return 0; + if (MI->getOperand(0).getSubReg() != 0) + return 0; + if (MI->getOperand(1).getSubReg() != 0) + return 0; + if (MI->getOperand(0).getReg() == Reg) + return MI->getOperand(1).getReg(); + if (MI->getOperand(1).getReg() == Reg) + return MI->getOperand(0).getReg(); + return 0; +} + +/// isSnippet - Identify if a live interval is a snippet that should be spilled. +/// It is assumed that SnipLI is a virtual register with the same original as +/// Edit->getReg(). +bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { + unsigned Reg = Edit->getReg(); + + // A snippet is a tiny live range with only a single instruction using it + // besides copies to/from Reg or spills/fills. We accept: + // + // %snip = COPY %Reg / FILL fi# + // %snip = USE %snip + // %Reg = COPY %snip / SPILL %snip, fi# + // + if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) + return false; + + MachineInstr *UseMI = 0; + + // Check that all uses satisfy our criteria. + for (MachineRegisterInfo::reg_nodbg_iterator + RI = MRI.reg_nodbg_begin(SnipLI.reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Allow copies to/from Reg. + if (isFullCopyOf(MI, Reg)) + continue; + + // Allow stack slot loads. + int FI; + if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow stack slot stores. + if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow a single additional instruction. + if (UseMI && MI != UseMI) + return false; + UseMI = MI; + } + return true; +} + +/// collectRegsToSpill - Collect live range snippets that only have a single +/// real use. +void InlineSpiller::collectRegsToSpill() { + unsigned Reg = Edit->getReg(); + + // Main register always spills. + RegsToSpill.assign(1, Reg); + SnippetCopies.clear(); + + // Snippets all have the same original, so there can't be any for an original + // register. + if (Original == Reg) + return; + + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) { + unsigned SnipReg = isFullCopyOf(MI, Reg); + if (!isSibling(SnipReg)) + continue; + LiveInterval &SnipLI = LIS.getInterval(SnipReg); + if (!isSnippet(SnipLI)) + continue; + SnippetCopies.insert(MI); + if (!isRegToSpill(SnipReg)) + RegsToSpill.push_back(SnipReg); + + DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + } +} + + +//===----------------------------------------------------------------------===// +// Sibling Values +//===----------------------------------------------------------------------===// + +// After live range splitting, some values to be spilled may be defined by +// copies from sibling registers. We trace the sibling copies back to the +// original value if it still exists. We need it for rematerialization. +// +// Even when the value can't be rematerialized, we still want to determine if +// the value has already been spilled, or we may want to hoist the spill from a +// loop. + +bool InlineSpiller::isSibling(unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) && + VRM.getOriginal(Reg) == Original; +} + +/// traceSiblingValue - Trace a value that is about to be spilled back to the +/// real defining instructions by looking through sibling copies. Always stay +/// within the range of OrigVNI so the registers are known to carry the same +/// value. +/// +/// Determine if the value is defined by all reloads, so spilling isn't +/// necessary - the value is already in the stack slot. +/// +/// Return a defining instruction that may be a candidate for rematerialization. +/// +MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, + VNInfo *OrigVNI) { + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << '\n'); + SmallPtrSet<VNInfo*, 8> Visited; + SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(UseReg, UseVNI)); + + // Best spill candidate seen so far. This must dominate UseVNI. + SibValueInfo SVI(UseReg, UseVNI); + MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); + unsigned SpillDepth = Loops.getLoopDepth(UseMBB); + bool SeenOrigPHI = false; // Original PHI met. + + do { + unsigned Reg; + VNInfo *VNI; + tie(Reg, VNI) = WorkList.pop_back_val(); + if (!Visited.insert(VNI)) + continue; + + // Is this value a better spill candidate? + if (!isRegToSpill(Reg)) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { + // This is a valid spill location dominating UseVNI. + // Prefer to spill at a smaller loop depth. + unsigned Depth = Loops.getLoopDepth(MBB); + if (Depth < SpillDepth) { + DEBUG(dbgs() << " spill depth " << Depth << ": " << PrintReg(Reg) + << ':' << VNI->id << '@' << VNI->def << '\n'); + SVI.SpillReg = Reg; + SVI.SpillVNI = VNI; + SpillDepth = Depth; + } + } + } + + // Trace through PHI-defs created by live range splitting. + if (VNI->isPHIDef()) { + if (VNI->def == OrigVNI->def) { + DEBUG(dbgs() << " orig phi value " << PrintReg(Reg) << ':' + << VNI->id << '@' << VNI->def << '\n'); + SeenOrigPHI = true; + continue; + } + // Get values live-out of predecessors. + LiveInterval &LI = LIS.getInterval(Reg); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + if (PVNI) + WorkList.push_back(std::make_pair(Reg, PVNI)); + } + continue; + } + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + + // Trace through sibling copies. + if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { + if (isSibling(SrcReg)) { + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex()); + assert(SrcVNI && "Copy from non-existing value"); + DEBUG(dbgs() << " copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def << '\n'); + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + continue; + } + } + + // Track reachable reloads. + int FI; + if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << " reload " << PrintReg(Reg) << ':' + << VNI->id << "@" << VNI->def << '\n'); + SVI.AllDefsAreReloads = true; + continue; + } + + // We have an 'original' def. Don't record trivial cases. + if (VNI == UseVNI) { + DEBUG(dbgs() << "Not a sibling copy.\n"); + return MI; + } + + // Potential remat candidate. + DEBUG(dbgs() << " def " << PrintReg(Reg) << ':' + << VNI->id << '@' << VNI->def << '\t' << *MI); + SVI.DefMI = MI; + } while (!WorkList.empty()); + + if (SeenOrigPHI || SVI.DefMI) + SVI.AllDefsAreReloads = false; + + DEBUG({ + if (SVI.AllDefsAreReloads) + dbgs() << "All defs are reloads.\n"; + else + dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n'; + }); + SibValues.insert(std::make_pair(UseVNI, SVI)); + return SVI.DefMI; +} + +/// analyzeSiblingValues - Trace values defined by sibling copies back to +/// something that isn't a sibling copy. +/// +/// Keep track of values that may be rematerializable. +void InlineSpiller::analyzeSiblingValues() { + SibValues.clear(); + + // No siblings at all? + if (Edit->getReg() == Original) + return; + + LiveInterval &OrigLI = LIS.getInterval(Original); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), + VE = LI.vni_end(); VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = 0; + // Check possible sibling copies. + if (VNI->isPHIDef() || VNI->getCopy()) { + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + if (OrigVNI->def != VNI->def) + DefMI = traceSiblingValue(Reg, VNI, OrigVNI); + } + if (!DefMI && !VNI->isPHIDef()) + DefMI = LIS.getInstructionFromIndex(VNI->def); + if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } + } + } +} + +/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert +/// a spill at a better location. +bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { + SlotIndex Idx = LIS.getInstructionIndex(CopyMI); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); + assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + SibValueMap::iterator I = SibValues.find(VNI); + if (I == SibValues.end()) + return false; + + const SibValueInfo &SVI = I->second; + + // Let the normal folding code deal with the boring case. + if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) + return false; + + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + // Conservatively extend the stack slot range to the range of the original + // value. We may be able to do better with stack slot coloring by being more + // careful here. + assert(StackInt && "No stack slot assigned yet."); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); + StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " + << *StackInt << '\n'); + + // Already spilled everywhere. + if (SVI.AllDefsAreReloads) + return true; + + // We are going to spill SVI.SpillVNI immediately after its def, so clear out + // any later spills of the same value. + eliminateRedundantSpills(SibLI, SVI.SpillVNI); + + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); + MachineBasicBlock::iterator MII; + if (SVI.SpillVNI->isPHIDef()) + MII = MBB->SkipPHIsAndLabels(MBB->begin()); + else { + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + assert(DefMI && "Defining instruction disappeared"); + MII = DefMI; + ++MII; + } + // Insert spill without kill flag immediately after def. + TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, + MRI.getRegClass(SVI.SpillReg), &TRI); + --MII; // Point to store instruction. + LIS.InsertMachineInstrInMaps(MII); + VRM.addSpillSlotUse(StackSlot, MII); + DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + return true; +} + +/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any +/// redundant spills of this value in SLI.reg and sibling copies. +void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { + assert(VNI && "Missing value"); + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(&SLI, VNI)); + assert(StackInt && "No stack slot assigned yet."); + + do { + LiveInterval *LI; + tie(LI, VNI) = WorkList.pop_back_val(); + unsigned Reg = LI->reg; + DEBUG(dbgs() << "Checking redundant spills for " + << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); + + // Regs to spill are taken care of. + if (isRegToSpill(Reg)) + continue; + + // Add all of VNI's live range to StackInt. + StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); + + // Find all spills and copies of VNI. + for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = UI.skipInstruction();) { + if (!MI->isCopy() && !MI->getDesc().mayStore()) + continue; + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (LI->getVNInfoAt(Idx) != VNI) + continue; + + // Follow sibling copies down the dominator tree. + if (unsigned DstReg = isFullCopyOf(MI, Reg)) { + if (isSibling(DstReg)) { + LiveInterval &DstLI = LIS.getInterval(DstReg); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + assert(DstVNI && "Missing defined value"); + assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + WorkList.push_back(std::make_pair(&DstLI, DstVNI)); + } + continue; + } + + // Erase spills. + int FI; + if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI); + // eliminateDeadDefs won't normally remove stores, so switch opcode. + MI->setDesc(TII.get(TargetOpcode::KILL)); + DeadDefs.push_back(MI); + } + } + } while (!WorkList.empty()); +} + + +//===----------------------------------------------------------------------===// +// Rematerialization +//===----------------------------------------------------------------------===// + +/// markValueUsed - Remember that VNI failed to rematerialize, so its defining +/// instruction cannot be eliminated. See through snippet copies +void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(LI, VNI)); + do { + tie(LI, VNI) = WorkList.pop_back_val(); + if (!UsedValues.insert(VNI)) + continue; + + if (VNI->isPHIDef()) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + if (PVNI) + WorkList.push_back(std::make_pair(LI, PVNI)); + } + continue; + } + + // Follow snippet copies. + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + if (!SnippetCopies.count(MI)) + continue; + LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); + assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + assert(SnipVNI && "Snippet undefined before copy"); + WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); + } while (!WorkList.empty()); +} + /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. -bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); - VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx); +bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, + MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx); - if (!OrigVNI) { + if (!ParentVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } DEBUG(dbgs() << UseIdx << '\t' << *MI); return true; } - LiveRangeEdit::Remat RM(OrigVNI); - if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) { - usedValues_.insert(OrigVNI); + if (SnippetCopies.count(MI)) + return false; + + // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. + LiveRangeEdit::Remat RM(ParentVNI); + SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); + if (SibI != SibValues.end()) + RM.OrigMI = SibI->second.DefMI; + if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; } - // If the instruction also writes edit_->getReg(), it had better not require - // the same register for uses and defs. + // If the instruction also writes VirtReg.reg, it had better not require the + // same register for uses and defs. bool Reads, Writes; SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops); if (Writes) { for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { - usedValues_.insert(OrigVNI); + markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); return false; } @@ -145,35 +641,31 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { // fold a load into the instruction. That avoids allocating a new register. if (RM.OrigMI->getDesc().canFoldAsLoad() && foldMemoryOperand(MI, Ops, RM.OrigMI)) { - edit_->markRematerialized(RM.ParentVNI); + Edit->markRematerialized(RM.ParentVNI); return true; } // Alocate a new register for the remat. - LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_); + LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); NewLI.markNotSpillable(); - // Rematting for a copy: Set allocation hint to be the destination register. - if (MI->isCopy()) - mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg()); - // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - lis_, tii_, tri_); + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + LIS, TII, TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' - << *lis_.getInstructionFromIndex(DefIdx)); + << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); - if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) { + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewLI.reg); MO.setIsKill(); } } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator()); + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); return true; @@ -182,75 +674,85 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - // Do a quick scan of the interval values to find if any are remattable. - if (!edit_->anyRematerializable(lis_, tii_, aa_)) + // analyzeSiblingValues has already tested all relevant defining instructions. + if (!Edit->anyRematerializable(LIS, TII, AA)) return; - usedValues_.clear(); + UsedValues.clear(); - // Try to remat before all uses of edit_->getReg(). + // Try to remat before all uses of snippets. bool anyRemat = false; - for (MachineRegisterInfo::use_nodbg_iterator - RI = mri_.use_nodbg_begin(edit_->getReg()); - MachineInstr *MI = RI.skipInstruction();) - anyRemat |= reMaterializeFor(MI); - + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (MachineRegisterInfo::use_nodbg_iterator + RI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(LI, MI); + } if (!anyRemat) return; // Remove any values that were completely rematted. - bool anyRemoved = false; - for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(), - E = edit_->getParent().vni_end(); I != E; ++I) { - VNInfo *VNI = *I; - if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) || - usedValues_.count(VNI)) - continue; - MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); - DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); - lis_.RemoveMachineInstrFromMaps(DefMI); - vrm_.RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); - VNI->def = SlotIndex(); - anyRemoved = true; + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + MI->addRegisterDead(Reg, &TRI); + if (!MI->allDefsAreDead()) + continue; + DEBUG(dbgs() << "All defs dead: " << *MI); + DeadDefs.push_back(MI); + } } - if (!anyRemoved) + // Eliminate dead code after remat. Note that some snippet copies may be + // deleted here. + if (DeadDefs.empty()) return; - - // Removing values may cause debug uses where parent is not live. - for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg()); - MachineInstr *MI = RI.skipInstruction();) { - if (!MI->isDebugValue()) + DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + + // Get rid of deleted and empty intervals. + for (unsigned i = RegsToSpill.size(); i != 0; --i) { + unsigned Reg = RegsToSpill[i-1]; + if (!LIS.hasInterval(Reg)) { + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); continue; - // Try to preserve the debug value if parent is live immediately after it. - MachineBasicBlock::iterator NextMI = MI; - ++NextMI; - if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { - SlotIndex Idx = lis_.getInstructionIndex(NextMI); - VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); - if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) - continue; } - DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); - MI->eraseFromParent(); + LiveInterval &LI = LIS.getInterval(Reg); + if (!LI.empty()) + continue; + Edit->eraseVirtReg(Reg, LIS); + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); } + DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); } -/// If MI is a load or store of stackSlot_, it can be removed. -bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { + +//===----------------------------------------------------------------------===// +// Spilling +//===----------------------------------------------------------------------===// + +/// If MI is a load or store of StackSlot, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { int FI = 0; - unsigned reg; - if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) && - !(reg = tii_.isStoreToStackSlot(MI, FI))) + unsigned InstrReg; + if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) && + !(InstrReg = TII.isStoreToStackSlot(MI, FI))) return false; // We have a stack access. Is it the right register and slot? - if (reg != edit_->getReg() || FI != stackSlot_) + if (InstrReg != Reg || FI != StackSlot) return false; DEBUG(dbgs() << "Coalescing stack access: " << *MI); - lis_.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); return true; } @@ -283,13 +785,13 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, } MachineInstr *FoldMI = - LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI) - : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) + : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; - lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); if (!LoadMI) - vrm_.addSpillSlotUse(stackSlot_, FoldMI); + VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); DEBUG(dbgs() << "\tfolded: " << *FoldMI); return true; @@ -297,84 +799,40 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, /// insertReload - Insert a reload of NewLI.reg before MI. void InlineSpiller::insertReload(LiveInterval &NewLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); - tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - vrm_.addSpillSlotUse(stackSlot_, MI); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, - lis_.getVNInfoAllocator()); + LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); } /// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, - MachineBasicBlock::iterator MI) { +void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - - // Get the defined value. It could be an early clobber so keep the def index. - SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); - VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); - assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo"); - Idx = VNI->def; - - tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - vrm_.addSpillSlotUse(stackSlot_, MI); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator()); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); } -void InlineSpiller::spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs) { - LiveRangeEdit edit(*li, newIntervals, spillIs); - spill(edit); - if (VerifySpills) - mf_.verify(&pass_, "After inline spill"); -} - -void InlineSpiller::spill(LiveRangeEdit &edit) { - edit_ = &edit; - assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) - && "Trying to spill a stack slot."); - DEBUG(dbgs() << "Inline spilling " - << mri_.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\nFrom original " - << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n'); - assert(edit.getParent().isSpillable() && - "Attempting to spill already spilled value."); - - reMaterializeAll(); +/// spillAroundUses - insert spill code around each use of Reg. +void InlineSpiller::spillAroundUses(unsigned Reg) { + LiveInterval &OldLI = LIS.getInterval(Reg); - // Remat may handle everything. - if (edit_->getParent().empty()) - return; - - rc_ = mri_.getRegClass(edit.getReg()); - - // Share a stack slot among all descendants of Orig. - unsigned Orig = vrm_.getOriginal(edit.getReg()); - stackSlot_ = vrm_.getStackSlot(Orig); - if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) - stackSlot_ = vrm_.assignVirt2StackSlot(Orig); - - if (Orig != edit.getReg()) - vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_); - - // Update LiveStacks now that we are committed to spilling. - LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_); - if (!stacklvr.hasAtLeastOneValue()) - stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); - stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0)); - - // Iterate over instructions using register. - for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg()); + // Iterate over instructions using Reg. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); MachineInstr *MI = RI.skipInstruction();) { // Debug values are not allowed to affect codegen. @@ -383,7 +841,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { uint64_t Offset = MI->getOperand(1).getImm(); const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, Offset, MDPtr, DL)) { DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); @@ -395,14 +853,44 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { continue; } + // Ignore copies to/from snippets. We'll delete them. + if (SnippetCopies.count(MI)) + continue; + // Stack slot accesses may coalesce away. - if (coalesceStackAccess(MI)) + if (coalesceStackAccess(MI, Reg)) continue; // Analyze instruction. bool Reads, Writes; SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops); + + // Find the slot index where this instruction reads and writes OldLI. + // This is usually the def slot, except for tied early clobbers. + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + if (SlotIndex::isSameInstr(Idx, VNI->def)) + Idx = VNI->def; + + // Check for a sibling copy. + unsigned SibReg = isFullCopyOf(MI, Reg); + if (SibReg && isSibling(SibReg)) { + if (Writes) { + // Hoist the spill of a sib-reg copy. + if (hoistSpill(OldLI, MI)) { + // This COPY is now dead, the value is already in the stack slot. + MI->getOperand(0).setIsDead(); + DeadDefs.push_back(MI); + continue; + } + } else { + // This is a reload for a sib-reg copy. Drop spills downstream. + LiveInterval &SibLI = LIS.getInterval(SibReg); + eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); + // The COPY will fold to a reload below. + } + } // Attempt to fold memory ops. if (foldMemoryOperand(MI, Ops)) @@ -410,11 +898,11 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = edit.create(mri_, lis_, vrm_); + LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); NewLI.markNotSpillable(); if (Reads) - insertReload(NewLI, MI); + insertReload(NewLI, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -429,11 +917,84 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { hasLiveDef = true; } } + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. if (Writes && hasLiveDef) - insertSpill(NewLI, MI); + insertSpill(NewLI, OldLI, Idx, MI); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); } } + +/// spillAll - Spill all registers remaining after rematerialization. +void InlineSpiller::spillAll() { + // Update LiveStacks now that we are committed to spilling. + if (StackSlot == VirtRegMap::NO_STACK_SLOT) { + StackSlot = VRM.assignVirt2StackSlot(Original); + StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); + StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator()); + } else + StackInt = &LSS.getInterval(StackSlot); + + if (Original != Edit->getReg()) + VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); + + assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); + + // Spill around uses of all RegsToSpill. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + spillAroundUses(RegsToSpill[i]); + + // Hoisted spills may cause dead code. + if (!DeadDefs.empty()) { + DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + } + + // Finally delete the SnippetCopies. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()); + MachineInstr *MI = RI.skipInstruction();) { + assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + // FIXME: Do this with a LiveRangeEdit callback. + VRM.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + // Delete all spilled registers. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + Edit->eraseVirtReg(RegsToSpill[i], LIS); +} + +void InlineSpiller::spill(LiveRangeEdit &edit) { + Edit = &edit; + assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) + && "Trying to spill a stack slot."); + // Share a stack slot among all descendants of Original. + Original = VRM.getOriginal(edit.getReg()); + StackSlot = VRM.getStackSlot(Original); + StackInt = 0; + + DEBUG(dbgs() << "Inline spilling " + << MRI.getRegClass(edit.getReg())->getName() + << ':' << edit.getParent() << "\nFrom original " + << LIS.getInterval(Original) << '\n'); + assert(edit.getParent().isSpillable() && + "Attempting to spill already spilled value."); + assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); + + collectRegsToSpill(); + analyzeSiblingValues(); + reMaterializeAll(); + + // Remat may handle everything. + if (!RegsToSpill.empty()) + spillAll(); + + Edit->calculateRegClassAndHint(MF, LIS, Loops); +} diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp new file mode 100644 index 000000000000..b1014a97fa03 --- /dev/null +++ b/lib/CodeGen/InterferenceCache.cpp @@ -0,0 +1,155 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "InterferenceCache.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +void InterferenceCache::init(MachineFunction *mf, + LiveIntervalUnion *liuarray, + SlotIndexes *indexes, + const TargetRegisterInfo *tri) { + MF = mf; + LIUArray = liuarray; + TRI = tri; + PhysRegEntries.assign(TRI->getNumRegs(), 0); + for (unsigned i = 0; i != CacheEntries; ++i) + Entries[i].clear(mf, indexes); +} + +InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { + unsigned E = PhysRegEntries[PhysReg]; + if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { + if (!Entries[E].valid(LIUArray, TRI)) + Entries[E].revalidate(); + return &Entries[E]; + } + // No valid entry exists, pick the next round-robin entry. + E = RoundRobin; + if (++RoundRobin == CacheEntries) + RoundRobin = 0; + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; +} + +/// revalidate - LIU contents have changed, update tags. +void InterferenceCache::Entry::revalidate() { + // Invalidate all block entries. + ++Tag; + // Invalidate all iterators. + PrevPos = SlotIndex(); + for (unsigned i = 0, e = Aliases.size(); i != e; ++i) + Aliases[i].second = Aliases[i].first->getTag(); +} + +void InterferenceCache::Entry::reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF) { + // LIU's changed, invalidate cache. + ++Tag; + PhysReg = physReg; + Blocks.resize(MF->getNumBlockIDs()); + Aliases.clear(); + for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { + LiveIntervalUnion *LIU = LIUArray + *AS; + Aliases.push_back(std::make_pair(LIU, LIU->getTag())); + } + + // Reset iterators. + PrevPos = SlotIndex(); + unsigned e = Aliases.size(); + Iters.resize(e); + for (unsigned i = 0; i != e; ++i) + Iters[i].setMap(Aliases[i].first->getMap()); +} + +bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + unsigned i = 0, e = Aliases.size(); + for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { + LiveIntervalUnion *LIU = LIUArray + *AS; + if (i == e || Aliases[i].first != LIU) + return false; + if (LIU->changedSince(Aliases[i].second)) + return false; + } + return i == e; +} + +void InterferenceCache::Entry::update(unsigned MBBNum) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + + // Use advanceTo only when possible. + if (PrevPos != Start) { + if (!PrevPos.isValid() || Start < PrevPos) + for (unsigned i = 0, e = Iters.size(); i != e; ++i) + Iters[i].find(Start); + else + for (unsigned i = 0, e = Iters.size(); i != e; ++i) + Iters[i].advanceTo(Start); + PrevPos = Start; + } + + MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + BlockInterference *BI = &Blocks[MBBNum]; + for (;;) { + BI->Tag = Tag; + BI->First = BI->Last = SlotIndex(); + + // Check for first interference. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid()) + continue; + SlotIndex StartI = I.start(); + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + PrevPos = Stop; + if (BI->First.isValid()) + break; + + // No interference in this block? Go ahead and precompute the next block. + if (++MFI == MF->end()) + return; + MBBNum = MFI->getNumber(); + BI = &Blocks[MBBNum]; + if (BI->Tag == Tag) + return; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + } + + // Check for last interference in block. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid() || I.start() >= Stop) + continue; + I.advanceTo(Stop); + bool Backup = !I.valid() || I.start() >= Stop; + if (Backup) + --I; + SlotIndex StopI = I.stop(); + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } +} diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h new file mode 100644 index 000000000000..6c36fa4021fb --- /dev/null +++ b/lib/CodeGen/InterferenceCache.h @@ -0,0 +1,163 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INTERFERENCECACHE +#define LLVM_CODEGEN_INTERFERENCECACHE + +#include "LiveIntervalUnion.h" + +namespace llvm { + +class InterferenceCache { + const TargetRegisterInfo *TRI; + LiveIntervalUnion *LIUArray; + SlotIndexes *Indexes; + MachineFunction *MF; + + /// BlockInterference - information about the interference in a single basic + /// block. + struct BlockInterference { + BlockInterference() : Tag(0) {} + unsigned Tag; + SlotIndex First; + SlotIndex Last; + }; + + /// Entry - A cache entry containing interference information for all aliases + /// of PhysReg in all basic blocks. + class Entry { + /// PhysReg - The register currently represented. + unsigned PhysReg; + + /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions + /// change. + unsigned Tag; + + /// MF - The current function. + MachineFunction *MF; + + /// Indexes - Mapping block numbers to SlotIndex ranges. + SlotIndexes *Indexes; + + /// PrevPos - The previous position the iterators were moved to. + SlotIndex PrevPos; + + /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of + /// PhysReg. + SmallVector<std::pair<LiveIntervalUnion*, unsigned>, 8> Aliases; + + typedef LiveIntervalUnion::SegmentIter Iter; + + /// Iters - an iterator for each alias + SmallVector<Iter, 8> Iters; + + /// Blocks - Interference for each block in the function. + SmallVector<BlockInterference, 8> Blocks; + + /// update - Recompute Blocks[MBBNum] + void update(unsigned MBBNum); + + public: + Entry() : PhysReg(0), Tag(0), Indexes(0) {} + + void clear(MachineFunction *mf, SlotIndexes *indexes) { + PhysReg = 0; + MF = mf; + Indexes = indexes; + } + + unsigned getPhysReg() const { return PhysReg; } + + void revalidate(); + + /// valid - Return true if this is a valid entry for physReg. + bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// reset - Initialize entry to represent physReg's aliases. + void reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF); + + /// get - Return an up to date BlockInterference. + BlockInterference *get(unsigned MBBNum) { + if (Blocks[MBBNum].Tag != Tag) + update(MBBNum); + return &Blocks[MBBNum]; + } + }; + + // We don't keep a cache entry for every physical register, that would use too + // much memory. Instead, a fixed number of cache entries are used in a round- + // robin manner. + enum { CacheEntries = 32 }; + + // Point to an entry for each physreg. The entry pointed to may not be up to + // date, and it may have been reused for a different physreg. + SmallVector<unsigned char, 2> PhysRegEntries; + + // Next round-robin entry to be picked. + unsigned RoundRobin; + + // The actual cache entries. + Entry Entries[CacheEntries]; + + // get - Get a valid entry for PhysReg. + Entry *get(unsigned PhysReg); + +public: + InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {} + + /// init - Prepare cache for a new function. + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, + const TargetRegisterInfo *); + + /// Cursor - The primary query interface for the block interference cache. + class Cursor { + Entry *CacheEntry; + BlockInterference *Current; + public: + /// Cursor - Create a cursor for the interference allocated to PhysReg and + /// all its aliases. + Cursor(InterferenceCache &Cache, unsigned PhysReg) + : CacheEntry(Cache.get(PhysReg)), Current(0) {} + + /// moveTo - Move cursor to basic block MBBNum. + void moveToBlock(unsigned MBBNum) { + Current = CacheEntry->get(MBBNum); + } + + /// hasInterference - Return true if the current block has any interference. + bool hasInterference() { + return Current->First.isValid(); + } + + /// first - Return the starting index of the first interfering range in the + /// current block. + SlotIndex first() { + return Current->First; + } + + /// last - Return the ending index of the last interfering range in the + /// current block. + SlotIndex last() { + return Current->Last; + } + }; + + friend class Cursor; +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 80dfc763af69..e1dad2efa98f 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -98,12 +98,6 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -// Enable or disable an experimental optimization to split GEPs -// and run a special GVN pass which does not examine loads, in -// an effort to factor out redundancy implicit in complex GEPs. -static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, - cl::desc("Split GEPs and run no-load GVN")); - LLVMTargetMachine::LLVMTargetMachine(const Target &T, const std::string &Triple) : TargetMachine(T), TargetTriple(Triple) { @@ -132,6 +126,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return true; assert(Context != 0 && "Failed to get MCContext"); + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + const MCAsmInfo &MAI = *getMCAsmInfo(); OwningPtr<MCStreamer> AsmStreamer; @@ -139,7 +136,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; @@ -152,6 +149,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), + hasMCUseCFI(), InstPrinter, MCE, TAB, ShowMCInst); @@ -230,11 +228,40 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_ostream &Out, CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) return true; + + if (hasMCSaveTempLabels()) + Ctx->setAllowTemporaryLabels(false); + + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx); + TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + if (MCE == 0 || TAB == 0) + return true; + + OwningPtr<MCStreamer> AsmStreamer; + AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx, + *TAB, Out, MCE, + hasMCRelaxAll(), + hasMCNoExecStack())); + AsmStreamer.get()->InitSections(); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + // Make sure the code model is set. setCodeModelForJIT(); @@ -272,12 +299,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (!DisableVerify) PM.add(createVerifierPass()); - // Optionally, tun split-GEPs and no-load GVN. - if (EnableSplitGEPGVN) { - PM.add(createGEPSplitterPass()); - PM.add(createGVNPass(/*NoLoads=*/true)); - } - // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -304,6 +325,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::DwarfTable: + case ExceptionHandling::ARM: PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 853ec1ac7c13..8b214831d2cd 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -76,6 +77,7 @@ typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; /// held by the same virtual register. The equivalence class is the transitive /// closure of that relation. namespace { +class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. @@ -99,10 +101,6 @@ class UserValue { void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, LiveIntervals &LIS, const TargetInstrInfo &TII); - /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx. - void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII); - public: /// UserValue - Create a new UserValue. UserValue(const MDNode *var, unsigned o, DebugLoc L, @@ -146,17 +144,31 @@ public: /// getLocationNo - Return the location number that matches Loc. unsigned getLocationNo(const MachineOperand &LocMO) { - if (LocMO.isReg() && LocMO.getReg() == 0) - return ~0u; - for (unsigned i = 0, e = locations.size(); i != e; ++i) - if (LocMO.isIdenticalTo(locations[i])) - return i; + if (LocMO.isReg()) { + if (LocMO.getReg() == 0) + return ~0u; + // For register locations we dont care about use/def and other flags. + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + locations[i].getReg() == LocMO.getReg() && + locations[i].getSubReg() == LocMO.getSubReg()) + return i; + } else + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (LocMO.isIdenticalTo(locations[i])) + return i; locations.push_back(LocMO); // We are storing a MachineOperand outside a MachineInstr. locations.back().clearParent(); + // Don't store def operands. + if (locations.back().isReg()) + locations.back().setIsUse(); return locations.size() - 1; } + /// mapVirtRegs - Ensure that all virtual register locations are mapped. + void mapVirtRegs(LDVImpl *LDV); + /// addDef - Add a definition point to this value. void addDef(SlotIndex Idx, const MachineOperand &LocMO) { // Add a singular (Idx,Idx) -> Loc mapping. @@ -168,19 +180,36 @@ public: /// extendDef - Extend the current definition as far as possible down the /// dominator tree. Stop when meeting an existing def or when leaving the live /// range of VNI. + /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. /// @param LI Restrict liveness to where LI has the value VNI. May be null. /// @param VNI When LI is not null, this is the value to restrict to. + /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT); + /// addDefsFromCopies - The value in LI/LocNo may be copies to other + /// registers. Determine if any of the copies are available at the kill + /// points, and add defs if possible. + /// @param LI Scan for copies of the value in LI->reg. + /// @param LocNo Location number of LI->reg. + /// @param Kills Points where the range of LocNo could be extended. + /// @param NewDefs Append (Idx, LocNo) of inserted defs here. + void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, + LiveIntervals &LIS); + /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. - void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT); + void computeIntervals(MachineRegisterInfo &MRI, + LiveIntervals &LIS, MachineDominatorTree &MDT); /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, @@ -230,9 +259,6 @@ class LDVImpl { /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); - /// mapVirtReg - Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// handleDebugValue - Add DBG_VALUE instruction to our maps. /// @param MI DBG_VALUE instruction /// @param Idx Last valid SLotIndex before instruction. @@ -261,7 +287,10 @@ public: userVarMap.clear(); } - /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx. + /// mapVirtReg - Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); + + /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. @@ -322,6 +351,13 @@ void UserValue::coalesceLocation(unsigned LocNo) { } } +void UserValue::mapVirtRegs(LDVImpl *LDV) { + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + LDV->mapVirtReg(locations[i].getReg(), this); +} + UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; @@ -363,14 +399,6 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { unsigned Offset = MI->getOperand(1).getImm(); const MDNode *Var = MI->getOperand(2).getMetadata(); UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); - - // If the location is a virtual register, make sure it is mapped. - if (MI->getOperand(0).isReg()) { - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - mapVirtReg(Reg, UV); - } - UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -405,6 +433,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT) { SmallVector<SlotIndex, 16> Todo; Todo.push_back(Idx); @@ -419,8 +448,11 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, bool ToEnd = true; if (LI && VNI) { LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) + if (!Range || Range->valno != VNI) { + if (Kills) + Kills->push_back(Start); continue; + } if (Range->end < Stop) Stop = Range->end, ToEnd = false; } @@ -438,6 +470,9 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limited by the next def. if (I.valid() && I.start() < Stop) Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); if (Start >= Stop) continue; @@ -455,7 +490,82 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, } void -UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { +UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { + if (Kills.empty()) + return; + // Don't track copies from physregs, there are too many uses. + if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + return; + + // Collect all the (vreg, valno) pairs that are copies of LI. + SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(LI->reg), + UE = MRI.use_nodbg_end(); UI != UE; ++UI) { + // Copies of the full value. + if (UI.getOperand().getSubReg() || !UI->isCopy()) + continue; + MachineInstr *MI = &*UI; + unsigned DstReg = MI->getOperand(0).getReg(); + + // Don't follow copies to physregs. These are usually setting up call + // arguments, and the argument registers are always call clobbered. We are + // better off in the source register which could be a callee-saved register, + // or it could be spilled. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + continue; + + // Is LocNo extended to reach this copy? If not, another def may be blocking + // it, or we are looking at a wrong value of LI. + SlotIndex Idx = LIS.getInstructionIndex(MI); + LocMap::iterator I = locInts.find(Idx.getUseIndex()); + if (!I.valid() || I.value() != LocNo) + continue; + + if (!LIS.hasInterval(DstReg)) + continue; + LiveInterval *DstLI = &LIS.getInterval(DstReg); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); + assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + CopyValues.push_back(std::make_pair(DstLI, DstVNI)); + } + + if (CopyValues.empty()) + return; + + DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); + + // Try to add defs of the copied values for each kill point. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + SlotIndex Idx = Kills[i]; + for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { + LiveInterval *DstLI = CopyValues[j].first; + const VNInfo *DstVNI = CopyValues[j].second; + if (DstLI->getVNInfoAt(Idx) != DstVNI) + continue; + // Check that there isn't already a def at Idx + LocMap::iterator I = locInts.find(Idx); + if (I.valid() && I.start() <= Idx) + continue; + DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + << DstVNI->id << " in " << *DstLI << '\n'); + MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); + assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); + unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); + I.insert(Idx, Idx.getNextSlot(), LocNo); + NewDefs.push_back(std::make_pair(Idx, LocNo)); + break; + } + } +} + +void +UserValue::computeIntervals(MachineRegisterInfo &MRI, + LiveIntervals &LIS, + MachineDominatorTree &MDT) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -463,7 +573,8 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { if (I.value() != ~0u) Defs.push_back(std::make_pair(I.start(), I.value())); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + // Extend all defs, and possibly add new ones along the way. + for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; unsigned LocNo = Defs[i].second; const MachineOperand &Loc = locations[LocNo]; @@ -472,9 +583,11 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { LiveInterval *LI = &LIS.getInterval(Loc.getReg()); const VNInfo *VNI = LI->getVNInfoAt(Idx); - extendDef(Idx, LocNo, LI, VNI, LIS, MDT); + SmallVector<SlotIndex, 16> Kills; + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT); + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); } else - extendDef(Idx, LocNo, 0, 0, LIS, MDT); + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT); } // Finally, erase all the undefs. @@ -486,8 +599,10 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { } void LDVImpl::computeIntervals() { - for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->computeIntervals(*LIS, *MDT); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT); + userValues[i]->mapVirtRegs(this); + } } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { @@ -640,13 +755,6 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, .addOperand(Loc).addImm(offset).addMetadata(variable); } -void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII) { - MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0) - .addImm(offset).addMetadata(variable); -} - void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); @@ -678,12 +786,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, break; ++I; - if (Stop == MBBEnd) - continue; - // The current interval ends before MBB. - // Insert a kill if there is a gap. - if (!I.valid() || I.start() > Stop) - insertDebugKill(MBB, Stop, LIS, TII); } } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index c2dbd6ab75a1..cfade24b8d87 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -30,19 +30,22 @@ #include <algorithm> using namespace llvm; -// CompEnd - Compare LiveRange ends. -namespace { -struct CompEnd { - bool operator()(const LiveRange &A, const LiveRange &B) const { - return A.end < B.end; - } -}; -} - LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { - assert(Pos.isValid() && "Cannot search for an invalid index"); - return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0), - CompEnd()); + // This algorithm is basically std::upper_bound. + // Unfortunately, std::upper_bound cannot be used with mixed types until we + // adopt C++0x. Many libraries can do it, but not all. + if (empty() || Pos >= endIndex()) + return end(); + iterator I = begin(); + size_t Len = ranges.size(); + do { + size_t Mid = Len >> 1; + if (Pos < I[Mid].end) + Len = Mid; + else + I += Mid + 1, Len -= Mid + 1; + } while (Len); + return I; } /// killedInRange - Return true if the interval has kills in [Start,End). @@ -291,6 +294,22 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } +/// extendInBlock - If this interval is live before UseIdx in the basic +/// block that starts at StartIdx, extend it to be live at UseIdx and return +/// the value. If there is no live range before UseIdx, return NULL. +VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) { + if (empty()) + return 0; + iterator I = std::upper_bound(begin(), end(), UseIdx); + if (I == begin()) + return 0; + --I; + if (I->end <= StartIdx) + return 0; + if (I->end <= UseIdx) + extendIntervalEndTo(I, UseIdx.getNextSlot()); + return I->valno; +} /// removeRange - Remove the specified range from this interval. Note that /// the range must be in a single LiveRange in its entirety. @@ -476,60 +495,19 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, void LiveInterval::MergeValueInAsValue( const LiveInterval &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { - SmallVector<VNInfo*, 4> ReplacedValNos; - iterator IP = begin(); + // TODO: Make this more efficient. + iterator InsertPos = begin(); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); if (I->valno != RHSValNo) continue; - SlotIndex Start = I->start, End = I->end; - IP = std::upper_bound(IP, end(), Start); - // If the start of this range overlaps with an existing liverange, trim it. - if (IP != begin() && IP[-1].end > Start) { - if (IP[-1].valno != LHSValNo) { - ReplacedValNos.push_back(IP[-1].valno); - IP[-1].valno = LHSValNo; // Update val#. - } - Start = IP[-1].end; - // Trimmed away the whole range? - if (Start >= End) continue; - } - // If the end of this range overlaps with an existing liverange, trim it. - if (IP != end() && End > IP->start) { - if (IP->valno != LHSValNo) { - ReplacedValNos.push_back(IP->valno); - IP->valno = LHSValNo; // Update val#. - } - End = IP->start; - // If this trimmed away the whole range, ignore it. - if (Start == End) continue; - } - // Map the valno in the other live range to the current live range. - IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); - } - - - SmallSet<VNInfo*, 4> Seen; - for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) { - VNInfo *V1 = ReplacedValNos[i]; - if (Seen.insert(V1)) { - bool isDead = true; - for (const_iterator I = begin(), E = end(); I != E; ++I) - if (I->valno == V1) { - isDead = false; - break; - } - if (isDead) { - // Now that V1 is dead, remove it. - markValNoForDeletion(V1); - } - } + LiveRange Tmp = *I; + Tmp.valno = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); } } - /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all /// LiveRanges with the V1 value number with the V2 value number. This can @@ -700,8 +678,8 @@ void LiveRange::print(raw_ostream &os) const { unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Create initial equivalence classes. - eqClass_.clear(); - eqClass_.grow(LI->getNumValNums()); + EqClass.clear(); + EqClass.grow(LI->getNumValNums()); const VNInfo *used = 0, *unused = 0; @@ -712,48 +690,65 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Group all unused values into one class. if (VNI->isUnused()) { if (unused) - eqClass_.join(unused->id, VNI->id); + EqClass.join(unused->id, VNI->id); unused = VNI; continue; } used = VNI; if (VNI->isPHIDef()) { - const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def); + const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); assert(MBB && "Phi-def has no defining MBB"); // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) if (const VNInfo *PVNI = - LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot())) - eqClass_.join(VNI->id, PVNI->id); + LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) - eqClass_.join(VNI->id, UVNI->id); + EqClass.join(VNI->id, UVNI->id); } } // Lump all the unused values in with the last used value. if (used && unused) - eqClass_.join(used->id, unused->id); + EqClass.join(used->id, unused->id); - eqClass_.compress(); - return eqClass_.getNumClasses(); + EqClass.compress(); + return EqClass.getNumClasses(); } -void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { +void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], + MachineRegisterInfo &MRI) { assert(LIV[0] && "LIV[0] must be set"); LiveInterval &LI = *LIV[0]; - // First move runs to new intervals. + // Rewrite instructions. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MO.isUse() && MO.isUndef()) + continue; + // DBG_VALUE instructions should have been eliminated earlier. + SlotIndex Idx = LIS.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + const VNInfo *VNI = LI.getVNInfoAt(Idx); + assert(VNI && "Interval not live at use."); + MO.setReg(LIV[getEqClass(VNI)]->reg); + } + + // Move runs to new intervals. LiveInterval::iterator J = LI.begin(), E = LI.end(); - while (J != E && eqClass_[J->valno->id] == 0) + while (J != E && EqClass[J->valno->id] == 0) ++J; for (LiveInterval::iterator I = J; I != E; ++I) { - if (unsigned eq = eqClass_[I->valno->id]) { + if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); LIV[eq]->ranges.push_back(*I); @@ -764,11 +759,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); - while (j != e && eqClass_[j] == 0) + while (j != e && EqClass[j] == 0) ++j; for (unsigned i = j; i != e; ++i) { VNInfo *VNI = LI.getValNumInfo(i); - if (unsigned eq = eqClass_[i]) { + if (unsigned eq = EqClass[i]) { VNI->id = LIV[eq]->getNumValNums(); LIV[eq]->valnos.push_back(VNI); } else { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index aef5b5f77e78..9257191f7fc0 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -572,19 +572,12 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else if (allocatableRegs_[MO.getReg()]) { + else { MachineInstr *CopyMI = NULL; if (MI->isCopyLike()) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); - // Def of a register also defines its sub-registers. - for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) - // If MI also modifies the sub-register explicitly, avoid processing it - // more than once. Do not pass in TRI here so it checks for exact match. - if (!MI->definesRegister(*AS)) - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(*AS), 0); } } @@ -645,7 +638,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, end = MIIdx.getStoreIndex(); } else { DEBUG(dbgs() << " live through"); - end = baseIndex; + end = getMBBEndIdx(MBB); } } @@ -746,7 +739,8 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. -void LiveIntervals::shrinkToUses(LiveInterval *li) { +bool LiveIntervals::shrinkToUses(LiveInterval *li, + SmallVectorImpl<MachineInstr*> *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can't only shrink physical registers"); @@ -760,7 +754,15 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { continue; SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); VNInfo *VNI = li->getVNInfoAt(Idx); - assert(VNI && "Live interval not live into reading instruction"); + if (!VNI) { + // This shouldn't happen: readsVirtualRegister returns true, but there is + // no live value. It is likely caused by a target getting <undef> flags + // wrong. + DEBUG(dbgs() << Idx << '\t' << *UseMI + << "Warning: Instr claims to read non-existent value in " + << *li << '\n'); + continue; + } if (VNI->def == Idx) { // Special case: An early-clobber tied operand reads and writes the // register one slot early. @@ -778,49 +780,47 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; + // We may eliminate PHI values, so recompute PHIKill flags. + VNI->setHasPHIKill(false); NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); + + // A use tied to an early-clobber def ends at the load slot and isn't caught + // above. Catch it here instead. This probably only ever happens for inline + // assembly. + if (VNI->def.isUse()) + if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) + WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); } + // Keep track of the PHIs that are in use. + SmallPtrSet<VNInfo*, 8> UsedPHIs; + // Extend intervals to reach all uses in WorkList. while (!WorkList.empty()) { SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - - // Extend the live range for VNI to be live at Idx. - LiveInterval::iterator I = NewLI.find(Idx); - - // Already got it? - if (I != NewLI.end() && I->start <= Idx) { - assert(I->valno == VNI && "Unexpected existing value number"); - continue; - } - - // Is there already a live range in the block containing Idx? const MachineBasicBlock *MBB = getMBBFromIndex(Idx); SlotIndex BlockStart = getMBBStartIdx(MBB); - DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx - << " in BB#" << MBB->getNumber() << '@' << BlockStart); - if (I != NewLI.begin() && (--I)->end > BlockStart) { - assert(I->valno == VNI && "Wrong reaching def"); - DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n"); - // Is this the first use of a PHIDef in its defining block? - if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) { - // The PHI is live, make sure the predecessors are live-out. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - VNInfo *PVNI = li->getVNInfoAt(Stop); - // A predecessor is not required to have a live-out value for a PHI. - if (PVNI) { - assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag"); - WorkList.push_back(std::make_pair(Stop, PVNI)); - } + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + (void)ExtVNI; + assert(ExtVNI == VNI && "Unexpected existing value number"); + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + VNInfo *PVNI = li->getVNInfoAt(Stop); + // A predecessor is not required to have a live-out value for a PHI. + if (PVNI) { + PVNI->setHasPHIKill(true); + WorkList.push_back(std::make_pair(Stop, PVNI)); } } - - // Extend the live range in the block to include Idx. - NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI)); continue; } @@ -838,6 +838,7 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { } // Handle dead values. + bool CanSeparate = false; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; @@ -847,21 +848,28 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { assert(LII != NewLI.end() && "Missing live range for PHI"); if (LII->end != VNI->def.getNextSlot()) continue; - if (!VNI->isPHIDef()) { + if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->setIsUnused(true); NewLI.removeRange(*LII); + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(li->reg, tri_); + if (dead && MI->allDefsAreDead()) { + DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); + dead->push_back(MI); + } } } // Move the trimmed ranges back. li->ranges.swap(NewLI.ranges); - DEBUG(dbgs() << "Shrink: " << *li << '\n'); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; } @@ -955,7 +963,7 @@ bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, bool LiveIntervals::isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI, - const SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> *SpillIs, bool &isLoad) { if (DisableReMat) return false; @@ -982,9 +990,10 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, // If a register operand of the re-materialized instruction is going to // be spilled next, then it's not legal to re-materialize this instruction. - for (unsigned i = 0, e = SpillIs.size(); i != e; ++i) - if (ImpUse == SpillIs[i]->reg) - return false; + if (SpillIs) + for (unsigned i = 0, e = SpillIs->size(); i != e; ++i) + if (ImpUse == (*SpillIs)[i]->reg) + return false; } return true; } @@ -993,16 +1002,15 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, /// val# of the specified interval is re-materializable. bool LiveIntervals::isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI) { - SmallVector<LiveInterval*, 4> Dummy1; bool Dummy2; - return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2); + return isReMaterializable(li, ValNo, MI, 0, Dummy2); } /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> *SpillIs, bool &isLoad) { isLoad = false; for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); @@ -1499,7 +1507,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // ... // def = ... // = use - // It's better to start a new interval to avoid artifically + // It's better to start a new interval to avoid artificially // extend the new interval. if (MI->readsWritesVirtualRegister(li.reg) == std::make_pair(false,true)) { @@ -1702,7 +1710,9 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // overflow a float. This expression behaves like 10^d for small d, but is // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of // headroom before overflow. - float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + // By the way, powf() might be unavailable here. For consistency, + // We may take pow(double,double). + float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); return (isDef + isUse) * lc; } @@ -1715,7 +1725,7 @@ static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { std::vector<LiveInterval*> LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> *SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { assert(li.isSpillable() && "attempt to spill already spilled interval!"); diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 205f28a0d65a..b67f96667bfd 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -35,12 +35,20 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) { LiveInterval::iterator RegEnd = VirtReg.end(); SegmentIter SegPos = Segments.find(RegPos->start); - for (;;) { + while (SegPos.valid()) { SegPos.insert(RegPos->start, RegPos->end, &VirtReg); if (++RegPos == RegEnd) return; SegPos.advanceTo(RegPos->start); } + + // We have reached the end of Segments, so it is no longer necessary to search + // for the insertion position. + // It is faster to insert the end first. + --RegEnd; + SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg); + for (; RegPos != RegEnd; ++RegPos, ++SegPos) + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); } // Remove a live virtual register's segments from this union. @@ -168,6 +176,7 @@ LiveIntervalUnion::Query::firstInterference() { return FirstInterference; CheckedFirstInterference = true; InterferenceResult &IR = FirstInterference; + IR.LiveUnionI.setMap(LiveUnion->getMap()); // Quickly skip interference check for empty sets. if (VirtReg->empty() || LiveUnion->empty()) { @@ -176,10 +185,10 @@ LiveIntervalUnion::Query::firstInterference() { // VirtReg starts first, perform double binary search. IR.VirtRegI = VirtReg->find(LiveUnion->startIndex()); if (IR.VirtRegI != VirtReg->end()) - IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start); + IR.LiveUnionI.find(IR.VirtRegI->start); } else { // LiveUnion starts first, perform double binary search. - IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex()); + IR.LiveUnionI.find(VirtReg->beginIndex()); if (IR.LiveUnionI.valid()) IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start()); else @@ -235,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { +collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -277,6 +286,11 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { // Cache the most recent interfering vreg to bypass isSeenInterference. RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; + + // Stop collecting when the max weight is exceeded. + if (RecentInterferingVReg->weight >= MaxWeight) + return InterferingVRegs.size(); + continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index 6f9c5f4455e9..c83578e99c6c 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -95,6 +95,9 @@ public: // Remove a live virtual register's segments from this union. void extract(LiveInterval &VirtReg); + // Remove all inserted virtual registers. + void clear() { Segments.clear(); ++Tag; } + // Print union, using TRI to translate register names void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; @@ -163,10 +166,10 @@ public: bool CheckedFirstInterference; bool SeenAllInterferences; bool SeenUnspillableVReg; - unsigned Tag; + unsigned Tag, UserTag; public: - Query(): LiveUnion(), VirtReg() {} + Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {} Query(LiveInterval *VReg, LiveIntervalUnion *LIU): LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false), @@ -181,11 +184,13 @@ public: SeenAllInterferences = false; SeenUnspillableVReg = false; Tag = 0; + UserTag = 0; } - void init(LiveInterval *VReg, LiveIntervalUnion *LIU) { + void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) { assert(VReg && LIU && "Invalid arguments"); - if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) { + if (UserTag == UTag && VirtReg == VReg && + LiveUnion == LIU && !LIU->changedSince(Tag)) { // Retain cached results, e.g. firstInterference. return; } @@ -193,6 +198,7 @@ public: LiveUnion = LIU; VirtReg = VReg; Tag = LIU->getTag(); + UserTag = UTag; } LiveInterval &virtReg() const { @@ -223,7 +229,8 @@ public: // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, + float MaxWeight = HUGE_VALF); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 3bbda1c2e609..f8a3dbb5fd7b 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -11,24 +11,41 @@ // is spilled or split. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "regalloc" #include "LiveRangeEdit.h" #include "VirtRegMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri, - LiveIntervals &lis, - VirtRegMap &vrm) { - const TargetRegisterClass *RC = mri.getRegClass(getReg()); - unsigned VReg = mri.createVirtualRegister(RC); - vrm.grow(); - vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg())); - LiveInterval &li = lis.getOrCreateInterval(VReg); - newRegs_.push_back(&li); - return li; +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, + LiveIntervals &LIS, + VirtRegMap &VRM) { + MachineRegisterInfo &MRI = VRM.getRegInfo(); + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + VRM.grow(); + VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + LiveInterval &LI = LIS.getOrCreateInterval(VReg); + newRegs_.push_back(&LI); + return LI; +} + +bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, + const MachineInstr *DefMI, + const TargetInstrInfo &tii, + AliasAnalysis *aa) { + assert(DefMI && "Missing instruction"); + scannedRemattable_ = true; + if (!tii.isTriviallyReMaterializable(DefMI, aa)) + return false; + remattable_.insert(VNI); + return true; } void LiveRangeEdit::scanRemattable(LiveIntervals &lis, @@ -42,8 +59,7 @@ void LiveRangeEdit::scanRemattable(LiveIntervals &lis, MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - if (tii.isTriviallyReMaterializable(DefMI, aa)) - remattable_.insert(VNI); + checkRematerializable(VNI, DefMI, tii, aa); } scannedRemattable_ = true; } @@ -66,18 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, UseIdx = UseIdx.getUseIndex(); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg()) + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. if (MO.isUndef() || !lis.hasInterval(MO.getReg())) continue; - // We don't want to move any defs. - if (MO.isDef()) - return false; // We cannot depend on virtual registers in uselessRegs_. - for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui) - if (uselessRegs_[ui]->reg == MO.getReg()) - return false; + if (uselessRegs_) + for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui) + if ((*uselessRegs_)[ui]->reg == MO.getReg()) + return false; LiveInterval &li = lis.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); @@ -99,16 +113,22 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, if (!remattable_.count(RM.ParentVNI)) return false; - // No defining instruction. - RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def); - assert(RM.OrigMI && "Defining instruction for remattable value disappeared"); + // No defining instruction provided. + SlotIndex DefIdx; + if (RM.OrigMI) + DefIdx = lis.getInstructionIndex(RM.OrigMI); + else { + DefIdx = RM.ParentVNI->def; + RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + assert(RM.OrigMI && "No defining instruction for remattable value"); + } // If only cheap remats were requested, bail out early. if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) return false; return true; @@ -120,10 +140,174 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, const Remat &RM, LiveIntervals &lis, const TargetInstrInfo &tii, - const TargetRegisterInfo &tri) { + const TargetRegisterInfo &tri, + bool Late) { assert(RM.OrigMI && "Invalid remat"); tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.InsertMachineInstrInMaps(--MI).getDefIndex(); + return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getDefIndex(); +} + +void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { + if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) + LIS.removeInterval(Reg); +} + +bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, + SmallVectorImpl<MachineInstr*> &Dead, + MachineRegisterInfo &MRI, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineInstr *DefMI = 0, *UseMI = 0; + + // Check that there is a single def and a single use. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MO.isDef()) { + if (DefMI && DefMI != MI) + return false; + if (!MI->getDesc().canFoldAsLoad()) + return false; + DefMI = MI; + } else if (!MO.isUndef()) { + if (UseMI && UseMI != MI) + return false; + // FIXME: Targets don't know how to fold subreg uses. + if (MO.getSubReg()) + return false; + UseMI = MI; + } + } + if (!DefMI || !UseMI) + return false; + + DEBUG(dbgs() << "Try to fold single def: " << *DefMI + << " into single use: " << *UseMI); + + SmallVector<unsigned, 8> Ops; + if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + return false; + + MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI); + if (!FoldMI) + return false; + DEBUG(dbgs() << " folded: " << *FoldMI); + LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); + UseMI->eraseFromParent(); + DefMI->addRegisterDead(LI->reg, 0); + Dead.push_back(DefMI); + return true; +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + LiveIntervals &LIS, VirtRegMap &VRM, + const TargetInstrInfo &TII) { + SetVector<LiveInterval*, + SmallVector<LiveInterval*, 8>, + SmallPtrSet<LiveInterval*, 8> > ToShrink; + MachineRegisterInfo &MRI = VRM.getRegInfo(); + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) { + MachineInstr *MI = Dead.pop_back_val(); + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + continue; + } + + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + continue; + } + + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.killedAt(Idx))) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { + if (delegate_) + delegate_->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) { + ToShrink.remove(&LI); + eraseVirtReg(Reg, LIS); + } + } + } + } + + if (delegate_) + delegate_->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + if (ToShrink.empty()) + break; + + // Shrink just one live interval. Then delete new dead defs. + LiveInterval *LI = ToShrink.back(); + ToShrink.pop_back(); + if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + continue; + if (delegate_) + delegate_->LRE_WillShrinkVirtReg(LI->reg); + if (!LIS.shrinkToUses(LI, &Dead)) + continue; + + // LI may have been separated, create new intervals. + LI->RenumberValues(LIS); + ConnectedVNInfoEqClasses ConEQ(LIS); + unsigned NumComp = ConEQ.Classify(LI); + if (NumComp <= 1) + continue; + DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); + SmallVector<LiveInterval*, 8> Dups(1, LI); + for (unsigned i = 1; i != NumComp; ++i) { + Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + if (delegate_) + delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + } + ConEQ.Distribute(&Dups[0], MRI); + } } +void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + LiveIntervals &LIS, + const MachineLoopInfo &Loops) { + VirtRegAuxInfo VRAI(MF, LIS, Loops); + for (iterator I = begin(), E = end(); I != E; ++I) { + LiveInterval &LI = **I; + VRAI.CalculateRegClass(LI.reg); + VRAI.CalculateWeightAndHint(LI); + } +} diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h index 73f69ed63983..14d227e61957 100644 --- a/lib/CodeGen/LiveRangeEdit.h +++ b/lib/CodeGen/LiveRangeEdit.h @@ -25,13 +25,36 @@ namespace llvm { class AliasAnalysis; class LiveIntervals; +class MachineLoopInfo; class MachineRegisterInfo; class VirtRegMap; class LiveRangeEdit { +public: + /// Callback methods for LiveRangeEdit owners. + struct Delegate { + /// Called immediately before erasing a dead machine instruction. + virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} + + /// Called when a virtual register is no longer used. Return false to defer + /// its deletion from LiveIntervals. + virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } + + /// Called before shrinking the live range of a virtual register. + virtual void LRE_WillShrinkVirtReg(unsigned) {} + + /// Called after cloning a virtual register. + /// This is used for new registers representing connected components of Old. + virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} + + virtual ~Delegate() {} + }; + +private: LiveInterval &parent_; SmallVectorImpl<LiveInterval*> &newRegs_; - const SmallVectorImpl<LiveInterval*> &uselessRegs_; + Delegate *const delegate_; + const SmallVectorImpl<LiveInterval*> *uselessRegs_; /// firstNew_ - Index of the first register added to newRegs_. const unsigned firstNew_; @@ -41,11 +64,11 @@ class LiveRangeEdit { /// remattable_ - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). - SmallPtrSet<VNInfo*,4> remattable_; + SmallPtrSet<const VNInfo*,4> remattable_; /// rematted_ - Values that were actually rematted, and so need to have their /// live range trimmed or entirely removed. - SmallPtrSet<VNInfo*,4> rematted_; + SmallPtrSet<const VNInfo*,4> rematted_; /// scanRemattable - Identify the parent_ values that may rematerialize. void scanRemattable(LiveIntervals &lis, @@ -57,6 +80,11 @@ class LiveRangeEdit { bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx, LiveIntervals &lis); + /// foldAsLoad - If LI has a single use and a single def that can be folded as + /// a load, eliminate the register by folding the def into the use. + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead, + MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); + public: /// Create a LiveRangeEdit for breaking down parent into smaller pieces. /// @param parent The register being spilled or split. @@ -66,9 +94,13 @@ public: /// rematerializing values because they are about to be removed. LiveRangeEdit(LiveInterval &parent, SmallVectorImpl<LiveInterval*> &newRegs, - const SmallVectorImpl<LiveInterval*> &uselessRegs) - : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs), - firstNew_(newRegs.size()), scannedRemattable_(false) {} + Delegate *delegate = 0, + const SmallVectorImpl<LiveInterval*> *uselessRegs = 0) + : parent_(parent), newRegs_(newRegs), + delegate_(delegate), + uselessRegs_(uselessRegs), + firstNew_(newRegs.size()), + scannedRemattable_(false) {} LiveInterval &getParent() const { return parent_; } unsigned getReg() const { return parent_.reg; } @@ -81,16 +113,33 @@ public: bool empty() const { return size() == 0; } LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } - /// create - Create a new register with the same class and stack slot as + /// FIXME: Temporary accessors until we can get rid of + /// LiveIntervals::AddIntervalsForSpills + SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; } + const SmallVectorImpl<LiveInterval*> *getUselessVRegs() { + return uselessRegs_; + } + + /// createFrom - Create a new virtual register based on OldReg. + LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); + + /// create - Create a new register with the same class and original slot as /// parent. - LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&); + LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { + return createFrom(getReg(), LIS, VRM); + } /// anyRematerializable - Return true if any parent values may be /// rematerializable. - /// This function must be called before ny rematerialization is attempted. + /// This function must be called before any rematerialization is attempted. bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, AliasAnalysis*); + /// checkRematerializable - Manually add VNI to the list of rematerializable + /// values if DefMI may be rematerializable. + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, + const TargetInstrInfo&, AliasAnalysis*); + /// Remat - Information needed to rematerialize at a specific location. struct Remat { VNInfo *ParentVNI; // parent_'s value at the remat location. @@ -116,18 +165,35 @@ public: const Remat &RM, LiveIntervals&, const TargetInstrInfo&, - const TargetRegisterInfo&); + const TargetRegisterInfo&, + bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing /// it manually. - void markRematerialized(VNInfo *ParentVNI) { + void markRematerialized(const VNInfo *ParentVNI) { rematted_.insert(ParentVNI); } /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. - bool didRematerialize(VNInfo *ParentVNI) const { + bool didRematerialize(const VNInfo *ParentVNI) const { return rematted_.count(ParentVNI); } + + /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try + /// to erase it from LIS. + void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); + + /// eliminateDeadDefs - Try to delete machine instructions that are now dead + /// (allDefsAreDead returns true). This may cause live intervals to be trimmed + /// and further dead efs to be eliminated. + void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + LiveIntervals&, VirtRegMap&, + const TargetInstrInfo&); + + /// calculateRegClassAndHint - Recompute register class and hint for each new + /// register. + void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, + const MachineLoopInfo&); }; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index dd43ef2530c1..20bad60dedda 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -107,9 +107,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); - for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), - E = MBB->pred_rend(); PI != E; ++PI) - WorkList.push_back(*PI); + WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, @@ -707,7 +705,7 @@ bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { // Loop over all of the successors of the basic block, checking to see if // the value is either live in the block, or if it is killed in the block. - std::vector<MachineBasicBlock*> OpSuccBlocks; + SmallVector<MachineBasicBlock*, 8> OpSuccBlocks; for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), E = MBB.succ_end(); SI != E; ++SI) { MachineBasicBlock *SuccMBB = *SI; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ccbff0af5b2c..57f3e34d0c5a 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -363,8 +363,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { } void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { - std::vector<MachineBasicBlock *>::iterator I = - std::find(Predecessors.begin(), Predecessors.end(), pred); + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } @@ -402,8 +401,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - std::vector<MachineBasicBlock *>::const_iterator I = - std::find(Successors.begin(), Successors.end(), MBB); + const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); return I != Successors.end(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 07a7d27b019f..f97ccf65790f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -365,6 +365,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!FoundCSE) { // Look for trivial copy coalescing opportunities. if (PerformTrivialCoalescing(MI, MBB)) { + Changed = true; + // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; @@ -379,10 +381,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (NewMI) { Commuted = true; FoundCSE = VNT.count(NewMI); - if (NewMI != MI) + if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); - else if (!FoundCSE) + Changed = true; + } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! (void)TII->commuteInstruction(MI); } @@ -450,6 +453,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumPhysCSEs; if (Commuted) ++NumCommutes; + Changed = true; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index aa9ea61acec7..71df6f8b7701 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -441,6 +441,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << ")"; } + // Print nontemporal info. + if (MMO.isNonTemporal()) + OS << "(nontemporal)"; + return OS; } @@ -451,7 +455,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), + : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -470,7 +475,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// implicit operands. It reserves space for the number of operands specified by /// the TargetInstrDesc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { if (!NoImp) NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); @@ -484,8 +489,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), - Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp) NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); @@ -499,7 +504,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// that the MachineInstr is created and added to the end of the specified /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); @@ -514,8 +519,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), - Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); @@ -528,7 +533,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -538,6 +543,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) addOperand(MI.getOperand(i)); NumImplicitOps = MI.NumImplicitOps; + // Copy all the flags. + Flags = MI.Flags; + // Set parent to null. Parent = 0; @@ -1417,6 +1425,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } bool HaveSemi = false; + if (Flags) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + OS << " flags: "; + + if (Flags & FrameSetup) + OS << "FrameSetup"; + } + if (!memoperands_empty()) { if (!HaveSemi) OS << ";"; HaveSemi = true; @@ -1447,13 +1463,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } } + // Print debug location information. if (!debugLoc.isUnknown() && MF) { - if (!HaveSemi) OS << ";"; + if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " dbg:"; printDebugLoc(debugLoc, MF, OS); } - OS << "\n"; + OS << '\n'; } bool MachineInstr::addRegisterKilled(unsigned IncomingReg, @@ -1530,13 +1547,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, continue; if (Reg == IncomingReg) { - if (!Found) { - if (MO.isDead()) - // The register is already marked dead. - return true; - MO.setIsDead(); - Found = true; - } + MO.setIsDead(); + Found = true; } else if (hasAliases && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(Reg)) { // There exists a super-register that's marked dead. diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 443fc2d97bdf..b315702eef8f 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -39,7 +39,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; STATISTIC(NumHoisted, @@ -169,6 +168,10 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); + /// HasAnyPHIUse - Return true if the specified register is used by any + /// phi node. + bool HasAnyPHIUse(unsigned Reg) const; + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -294,7 +297,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { RegLimit.resize(NumRC); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF); + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); } // Get our Loop information... @@ -758,18 +761,25 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasPHIUses - Return true if the specified register has any PHI use. -static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { +/// HasAnyPHIUse - Return true if the specified register is used by any +/// phi node. +bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; if (UseMI->isPHI()) return true; + // Look pass copies as well. + if (UseMI->isCopy()) { + unsigned Def = UseMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Def) && + HasAnyPHIUse(Def)) + return true; + } } return false; } - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -976,14 +986,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return false; } - // If result(s) of this instruction is used by PHIs, then don't hoist it. - // The presence of joins makes it difficult for current register allocator - // implementation to perform remat. + // If result(s) of this instruction is used by PHIs outside of the loop, then + // don't hoist it if the instruction because it will introduce an extra copy. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - if (HasPHIUses(MO.getReg(), MRI)) + if (HasAnyPHIUse(MO.getReg())) return false; } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 8a93a24287b6..916dff70a41e 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -265,8 +265,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MI->isDebugValue()) continue; - if (PerformTrivialForwardCoalescing(MI, &MBB)) + bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); + if (Joined) { + MadeChange = true; continue; + } if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 7351119f4728..f95f4112aeda 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -402,6 +402,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { SmallVector<MachineOperand, 4> Cond; if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, Cond)) { + // If the block branches directly to a landing pad successor, pretend that + // the landing pad is a normal block. + LandingPadSuccs.erase(TBB); + LandingPadSuccs.erase(FBB); + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { @@ -602,9 +607,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check Live Variables. if (MI->isDebugValue()) { // Liveness checks are not valid for debug values. - } else if (MO->isUndef()) { - // An <undef> doesn't refer to any register, so just skip it. - } else if (MO->isUse()) { + } else if (MO->isUse() && !MO->isUndef()) { regsLiveInButUnused.erase(Reg); bool isKill = false; @@ -612,13 +615,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { // A two-addr use counts as a kill if use and def are the same. unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) { + if (Reg == DefReg) isKill = true; - // And in that case an explicit kill flag is not allowed. - if (MO->isKill()) - report("Illegal kill flag on two-address instruction operand", - MO, MONum); - } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { report("Two-address instruction operands must be identical", MO, MONum); } @@ -675,8 +674,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); } } - } else { - assert(MO->isDef()); + } else if (MO->isDef()) { // Register defined. // TODO: verify that earlyclobber ops are not used. if (MO->isDead()) diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 5f7cf582c960..af65f13bf065 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -28,12 +28,17 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include <algorithm> -#include <map> using namespace llvm; +static cl::opt<bool> +DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), + cl::Hidden, cl::desc("Disable critical edge splitting " + "during PHI elimination")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -105,10 +110,12 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; // Split critical edges to help the coalescer - if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { - MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + if (!DisableEdgeSplitting) { + if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + } } // Populate VRegPHIUseCount diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 3489db2e9f4f..315aedddb9ef 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -55,6 +55,11 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::setDefault(RegAlloc); } + // This forces linking of the linear scan register allocator, + // so -regalloc=linearscan still works in clang. + if (Ctor == createLinearScanRegisterAllocator) + return createLinearScanRegisterAllocator(); + if (Ctor != createDefaultRegisterAllocator) return Ctor(); @@ -63,6 +68,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { case CodeGenOpt::None: return createFastRegisterAllocator(); default: - return createLinearScanRegisterAllocator(); + return createGreedyRegisterAllocator(); } } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 5d7123caa017..c105bb06ebe5 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -30,6 +30,15 @@ // If the "sub" instruction all ready sets (or could be modified to set) the // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. +// +// - Optimize Bitcast pairs: +// +// v1 = bitcast v0 +// v2 = bitcast v1 +// = v2 +// => +// v1 = bitcast v0 +// = v0 // //===----------------------------------------------------------------------===// @@ -57,7 +66,8 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumEliminated, "Number of compares eliminated"); +STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); +STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate foled"); namespace { @@ -85,6 +95,7 @@ namespace { } private: + bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); @@ -243,12 +254,85 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } +/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast +/// a value cross register classes), and the source is defined by another +/// bitcast instruction B. And if the register class of source of B matches +/// the register class of instruction A, then it is legal to replace all uses +/// of the def of A with source of B. e.g. +/// %vreg0<def> = VMOVSR %vreg1 +/// %vreg3<def> = VMOVRS %vreg0 +/// Replace all uses of vreg3 with vreg1. + +bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + + unsigned Def = 0; + unsigned Src = 0; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Def = Reg; + else if (Src) + // Multiple sources? + return false; + else + Src = Reg; + } + + assert(Def && Src && "Malformed bitcast instruction!"); + + MachineInstr *DefMI = MRI->getVRegDef(Src); + if (!DefMI || !DefMI->getDesc().isBitcast()) + return false; + + unsigned SrcDef = 0; + unsigned SrcSrc = 0; + NumDefs = DefMI->getDesc().getNumDefs(); + NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = DefMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + SrcDef = Reg; + else if (SrcSrc) + // Multiple sources? + return false; + else + SrcSrc = Reg; + } + + if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) + return false; + + MRI->replaceRegWith(Def, SrcSrc); + MRI->clearKillFlags(SrcSrc); + MI->eraseFromParent(); + ++NumBitcasts; + return true; +} + /// OptimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, - MachineBasicBlock *MBB){ + MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. unsigned SrcReg; @@ -259,7 +343,7 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, // Attempt to optimize the comparison instruction. if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { - ++NumEliminated; + ++NumCmps; return true; } @@ -345,7 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->getDesc().isCompare()) { + const TargetInstrDesc &TID = MI->getDesc(); + + if (TID.isBitcast()) { + if (OptimizeBitcastInstr(MI, MBB)) { + // MI is deleted. + Changed = true; + MII = First ? I->begin() : llvm::next(PMII); + continue; + } + } else if (TID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 9cd9941e56b3..c04d65637c94 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -47,7 +47,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, unsigned OpIdx, - const TargetInstrInfo *tii_, SmallSet<unsigned, 8> &ImpDefRegs) { switch(OpIdx) { case 1: @@ -61,7 +60,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, } static bool isUndefCopy(MachineInstr *MI, unsigned Reg, - const TargetInstrInfo *tii_, SmallSet<unsigned, 8> &ImpDefRegs) { if (MI->isCopy()) { MachineOperand &MO0 = MI->getOperand(0); @@ -86,11 +84,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool Changed = false; - const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo(); - const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo(); - MachineRegisterInfo *mri_ = &fn.getRegInfo(); - - LiveVariables *lv_ = &getAnalysis<LiveVariables>(); + TII = fn.getTarget().getInstrInfo(); + TRI = fn.getTarget().getRegisterInfo(); + MRI = &fn.getRegInfo(); + LV = &getAnalysis<LiveVariables>(); SmallSet<unsigned, 8> ImpDefRegs; SmallVector<MachineInstr*, 8> ImpDefMIs; @@ -113,7 +110,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS) + for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } ImpDefMIs.push_back(MI); @@ -125,7 +122,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { - LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); + LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } MI->eraseFromParent(); @@ -145,14 +142,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) { + if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { bool isKill = MO.isKill(); - MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); - LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(MI); } ChangedToImpDef = true; @@ -210,8 +207,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // uses. bool Skip = false; SmallVector<MachineInstr*, 4> DeadImpDefs; - for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg), - DE = mri_->def_end(); DI != DE; ++DI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; if (!DeadImpDef->isImplicitDef()) { Skip = true; @@ -229,8 +226,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { Changed = true; // Process each use instruction once. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), - UE = mri_->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { if (UI.getOperand().isUndef()) continue; MachineInstr *RMI = &*UI; @@ -242,8 +239,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. - if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) { - RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + if (isUndefCopy(RMI, Reg, ImpDefRegs)) { + RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector<unsigned, 4> Ops; @@ -263,15 +260,15 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Update LiveVariables varinfo if the instruction is a kill. if (isKill) { - LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } continue; } // Replace Reg with a new vreg that's marked implicit. - const TargetRegisterClass* RC = mri_->getRegClass(Reg); - unsigned NewVReg = mri_->createVirtualRegister(RC); + const TargetRegisterClass* RC = MRI->getRegClass(Reg); + unsigned NewVReg = MRI->createVirtualRegister(RC); bool isKill = true; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index ad7b6e4aa97f..f1f3c9969cc8 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -337,7 +337,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -437,7 +437,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); @@ -559,7 +559,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { + if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -641,7 +642,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { + if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || + !RegInfo->useFPForScavengingIndex(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -811,7 +813,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); @@ -842,10 +843,8 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { } } - if (DoIncr) { - RS->forward(I); - ++I; - } + RS->forward(I); + ++I; } } } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index b655dda41153..7f75f65167a3 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -26,7 +26,7 @@ and then "merge" mul and mov: sxth r3, r3 mla r4, r3, lr, r4 -It also increase the likelyhood the store may become dead. +It also increase the likelihood the store may become dead. //===---------------------------------------------------------------------===// @@ -162,7 +162,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo. //===---------------------------------------------------------------------===// -Stack coloring improvments: +Stack coloring improvements: 1. Do proper LiveStackAnalysis on all stack objects including those which are not spill slots. diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 5af0ce79acf7..f431d5a5a026 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -61,6 +61,11 @@ class LiveVirtRegQueue; /// assignment order. class RegAllocBase { LiveIntervalUnion::Allocator UnionAllocator; + + // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual + // registers may have changed. + unsigned UserTag; + protected: // Array of LiveIntervalUnions indexed by physical register. class LiveUnionArray { @@ -92,7 +97,7 @@ protected: // query on a new live virtual register. OwningArrayPtr<LiveIntervalUnion::Query> Queries; - RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {} + RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} virtual ~RegAllocBase() {} @@ -104,7 +109,7 @@ protected: // before querying a new live virtual register. This ties Queries and // PhysReg2LiveUnion together. LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { - Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]); + Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]); return Queries[PhysReg]; } diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 6923908a32d9..d92d80f181fc 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "LiveDebugVariables.h" #include "LiveIntervalUnion.h" +#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "RenderMachineFunction.h" #include "Spiller.h" @@ -136,6 +138,7 @@ char RABasic::ID = 0; } // end anonymous namespace RABasic::RABasic(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); @@ -154,6 +157,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); AU.addRequiredTransitive<RegisterCoalescer>(); @@ -230,9 +235,12 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; - PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs()); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + const unsigned NumRegs = TRI->getNumRegs(); + if (NumRegs != PhysReg2LiveUnion.numRegs()) { + PhysReg2LiveUnion.init(UnionAllocator, NumRegs); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + } } void RegAllocBase::LiveUnionArray::clear() { @@ -246,13 +254,15 @@ void RegAllocBase::LiveUnionArray::clear() { } void RegAllocBase::releaseMemory() { - PhysReg2LiveUnion.clear(); + for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) + PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical // register, unify them with the corresponding LiveIntervalUnion, otherwise push // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { unsigned RegNum = I->first; LiveInterval &VirtReg = *I->second; @@ -268,6 +278,7 @@ void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << '\n'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); PhysReg2LiveUnion[PhysReg].unify(VirtReg); ++NumAssigned; } @@ -288,6 +299,18 @@ void RegAllocBase::allocatePhysRegs() { // Continue assigning vregs one at a time to available physical registers. while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + ++UserTag; + // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. @@ -304,7 +327,12 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { LiveInterval *SplitVirtReg = *I; - if (SplitVirtReg->empty()) continue; + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); @@ -344,7 +372,8 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills); + spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the // contents valid until we're done accessing pendingSpills. @@ -381,29 +410,31 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // Add newly allocated physical registers to the MBB live in sets. void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - typedef SmallVector<MachineBasicBlock*, 8> MBBVec; - MBBVec liveInMBBs; - MachineBasicBlock &entryMBB = *MF->begin(); + SlotIndexes *Indexes = LIS->getSlotIndexes(); + if (MF->size() <= 1) + return; + LiveIntervalUnion::SegmentIter SI; for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; if (LiveUnion.empty()) continue; - for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid(); - ++SI) { - - // Find the set of basic blocks which this range is live into... - liveInMBBs.clear(); - if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue; - - // And add the physreg for this interval to their live-in sets. - for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end(); - I != E; ++I) { - MachineBasicBlock *MBB = *I; - if (MBB == &entryMBB) continue; - if (MBB->isLiveIn(PhysReg)) continue; - MBB->addLiveIn(PhysReg); - } + MachineFunction::iterator MBB = llvm::next(MF->begin()); + MachineFunction::iterator MFE = MF->end(); + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.setMap(LiveUnion.getMap()); + SI.find(Start); + while (SI.valid()) { + if (SI.start() <= Start) { + if (!MBB->isLiveIn(PhysReg)) + MBB->addLiveIn(PhysReg); + } else if (SI.start() > Stop) + MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); + if (++MBB == MFE) + break; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.advanceTo(Start); } } } @@ -469,9 +500,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, } // No other spill candidates were found, so spill the current VirtReg. DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); - SmallVector<LiveInterval*, 1> pendingSpills; - - spiller().spill(&VirtReg, SplitVRegs, pendingSpills); + LiveRangeEdit LRE(VirtReg, SplitVRegs); + spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -490,7 +520,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { ReservedRegs = TRI->getReservedRegs(*MF); - SpillerInstance.reset(createSpiller(*this, *MF, *VRM)); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); @@ -525,6 +555,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { // Run rewriter VRM->rewrite(LIS->getSlotIndexes()); + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 15036e38b893..b2fd6e092ce6 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -97,7 +97,7 @@ namespace { // immediately without checking aliases. regFree, - // A reserved register has been assigned expolicitly (e.g., setting up a + // A reserved register has been assigned explicitly (e.g., setting up a // call parameter), and it remains reserved until it is used. regReserved @@ -396,7 +396,6 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, PhysRegState[PhysReg] = NewState; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { - UsedInInstr.set(Alias); switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -420,20 +419,25 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) + if (UsedInInstr.test(PhysReg)) { + DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); return spillImpossible; + } switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; case regFree: return 0; case regReserved: + DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " + << PhysReg << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } - // This is a disabled register, add up const of aliases. + // This is a disabled register, add up cost of aliases. + DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -511,9 +515,14 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { - if (!Allocatable.test(*I)) + if (!Allocatable.test(*I)) { + DEBUG(dbgs() << "\tRegister " << *I << " is not allocatable.\n"); continue; + } unsigned Cost = calcSpillCost(*I); + DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) return assignVirtToPhysReg(LRE, *I); @@ -722,9 +731,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); UsedInInstr.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - UsedInInstr.set(*AS); } // Also mark PartialDefs as used to avoid reallocation. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 406485aaf496..7c461d8ea787 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -14,7 +14,8 @@ #define DEBUG_TYPE "regalloc" #include "AllocationOrder.h" -#include "LiveIntervalUnion.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" #include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" @@ -49,14 +50,16 @@ using namespace llvm; STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); -STATISTIC(NumReassigned, "Number of interferences reassigned"); STATISTIC(NumEvicted, "Number of interferences evicted"); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); namespace { -class RAGreedy : public MachineFunctionPass, public RegAllocBase { +class RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + // context MachineFunction *MF; BitVector ReservedRegs; @@ -72,14 +75,73 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { // state std::auto_ptr<Spiller> SpillerInstance; - std::auto_ptr<SplitAnalysis> SA; std::priority_queue<std::pair<unsigned, unsigned> > Queue; - IndexedMap<unsigned, VirtReg2IndexFunctor> Generation; + + // Live ranges pass through a number of stages as we try to allocate them. + // Some of the stages may also create new live ranges: + // + // - Region splitting. + // - Per-block splitting. + // - Local splitting. + // - Spilling. + // + // Ranges produced by one of the stages skip the previous stages when they are + // dequeued. This improves performance because we can skip interference checks + // that are unlikely to give any results. It also guarantees that the live + // range splitting algorithm terminates, something that is otherwise hard to + // ensure. + enum LiveRangeStage { + RS_New, ///< Never seen before. + RS_First, ///< First time in the queue. + RS_Second, ///< Second time in the queue. + RS_Global, ///< Produced by global splitting. + RS_Local, ///< Produced by local splitting. + RS_Spill ///< Produced by spilling. + }; + + IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage; + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return LiveRangeStage(LRStage[VirtReg.reg]); + } + + template<typename Iterator> + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + LRStage.resize(MRI->getNumVirtRegs()); + for (;Begin != End; ++Begin) { + unsigned Reg = (*Begin)->reg; + if (LRStage[Reg] == RS_New) + LRStage[Reg] = NewStage; + } + } // splitting state. + std::auto_ptr<SplitAnalysis> SA; + std::auto_ptr<SplitEditor> SE; - /// All basic blocks where the current register is live. - SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints; + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + unsigned PhysReg; + BitVector LiveBundles; + SmallVector<unsigned, 8> ActiveBlocks; + + void reset(unsigned Reg) { + PhysReg = Reg; + LiveBundles.clear(); + ActiveBlocks.clear(); + } + }; + + /// Candidate info for for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector<GlobalSplitCandidate, 32> GlobalCand; /// For every instruction in SA->UseSlots, store the previous non-copy /// instruction. @@ -108,42 +170,50 @@ public: static char ID; private: - bool checkUncachedInterference(LiveInterval&, unsigned); - LiveInterval *getSingleInterference(LiveInterval&, unsigned); - bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg); - float calcInterferenceWeight(LiveInterval&, unsigned); - float calcInterferenceInfo(LiveInterval&, unsigned); - float calcGlobalSplitCost(const BitVector&); - void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, + void LRE_WillEraseInstruction(MachineInstr*); + bool LRE_CanEraseVirtReg(unsigned); + void LRE_WillShrinkVirtReg(unsigned); + void LRE_DidCloneVirtReg(unsigned, unsigned); + + float calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, float&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); + float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl<LiveInterval*>&); void calcGapWeights(unsigned, SmallVectorImpl<float>&); SlotIndex getPrevMappedIndex(const MachineInstr*); void calcPrevSlots(); unsigned nextSplitPoint(unsigned); - bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&); + bool canEvictInterference(LiveInterval&, unsigned, float&); - unsigned tryReassign(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); + SmallVectorImpl<LiveInterval*>&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); unsigned trySplit(LiveInterval&, AllocationOrder&, SmallVectorImpl<LiveInterval*>&); - unsigned trySpillInterferences(LiveInterval&, AllocationOrder&, - SmallVectorImpl<LiveInterval*>&); }; } // end anonymous namespace char RAGreedy::ID = 0; +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = 0.98f; + + FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID) { +RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); @@ -166,6 +236,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LiveIntervals>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); AU.addRequiredTransitive<RegisterCoalescer>(); @@ -185,9 +257,49 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } + +//===----------------------------------------------------------------------===// +// LiveRangeEdit delegate methods +//===----------------------------------------------------------------------===// + +void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { + // LRE itself will remove from SlotIndexes and parent basic block. + VRM->RemoveMachineInstrFromMaps(MI); +} + +bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (unsigned PhysReg = VRM->getPhys(VirtReg)) { + unassign(LIS->getInterval(VirtReg), PhysReg); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg); + if (!PhysReg) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + unassign(LI, PhysReg); + enqueue(&LI); +} + +void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // LRE may clone a virtual register because dead code elimination causes it to + // be split into connected components. Ensure that the new register gets the + // same stage as the parent. + LRStage.grow(New); + LRStage[New] = LRStage[Old]; +} + void RAGreedy::releaseMemory() { SpillerInstance.reset(0); - Generation.clear(); + LRStage.clear(); + GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -198,20 +310,26 @@ void RAGreedy::enqueue(LiveInterval *LI) { const unsigned Reg = LI->reg; assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); - const unsigned Hint = VRM->getRegAllocPref(Reg); unsigned Prio; - Generation.grow(Reg); - if (++Generation[Reg] == 1) - // 1st generation ranges are handled first, long -> short. + LRStage.grow(Reg); + if (LRStage[Reg] == RS_New) + LRStage[Reg] = RS_First; + + if (LRStage[Reg] == RS_Second) + // Unsplit ranges that couldn't be allocated immediately are deferred until + // everything else has been allocated. Long ranges are allocated last so + // they are split against realistic interference. + Prio = (1u << 31) - Size; + else { + // Everything else is allocated in long->short order. Long ranges that don't + // fit should be spilled ASAP so they don't create interference. Prio = (1u << 31) + Size; - else - // Repeat offenders are handled second, short -> long - Prio = (1u << 30) - Size; - // Boost ranges that have a physical register hint. - if (TargetRegisterInfo::isPhysicalRegister(Hint)) - Prio |= (1u << 30); + // Boost ranges that have a physical register hint. + if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg))) + Prio |= (1u << 30); + } Queue.push(std::make_pair(Prio, Reg)); } @@ -224,97 +342,34 @@ LiveInterval *RAGreedy::dequeue() { return LI; } + //===----------------------------------------------------------------------===// -// Register Reassignment +// Direct Assignment //===----------------------------------------------------------------------===// -// Check interference without using the cache. -bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]); - if (subQ.checkInterference()) - return true; - } - return false; -} - -/// getSingleInterference - Return the single interfering virtual register -/// assigned to PhysReg. Return 0 if more than one virtual register is -/// interfering. -LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - // Check physreg and aliases. - LiveInterval *Interference = 0; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - if (Q.checkInterference()) { - if (Interference) - return 0; - if (Q.collectInterferingVRegs(2) > 1) - return 0; - Interference = Q.interferingVRegs().front(); - } - } - return Interference; -} - -// Attempt to reassign this virtual register to a different physical register. -// -// FIXME: we are not yet caching these "second-level" interferences discovered -// in the sub-queries. These interferences can change with each call to -// selectOrSplit. However, we could implement a "may-interfere" cache that -// could be conservatively dirtied when we reassign or split. -// -// FIXME: This may result in a lot of alias queries. We could summarize alias -// live intervals in their parent register's live union, but it's messy. -bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg, - unsigned WantedPhysReg) { - assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) && - "Can only reassign virtual registers"); - assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) && - "inconsistent phys reg assigment"); - - AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - // Don't reassign to a WantedPhysReg alias. - if (TRI->regsOverlap(PhysReg, WantedPhysReg)) - continue; - - if (checkUncachedInterference(InterferingVReg, PhysReg)) - continue; +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + Order.rewind(); + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!checkPhysRegInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint(PhysReg)) + return PhysReg; - // Reassign the interfering virtual reg to this physical reg. - unsigned OldAssign = VRM->getPhys(InterferingVReg.reg); - DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " << - TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n'); - unassign(InterferingVReg, OldAssign); - assign(InterferingVReg, PhysReg); - ++NumReassigned; - return true; - } - return false; -} + // PhysReg is available. Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); -/// tryReassign - Try to reassign a single interference to a different physreg. -/// @param VirtReg Currently unassigned virtual register. -/// @param Order Physregs to try. -/// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs){ - NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled); + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; - Order.rewind(); - while (unsigned PhysReg = Order.next()) { - LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg); - if (!InterferingVReg) - continue; - if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg)) - continue; - if (reassignVReg(*InterferingVReg, PhysReg)) - return PhysReg; - } - return 0; + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; } @@ -323,22 +378,24 @@ unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// /// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. Set maxWeight to the maximal spill weight of an interference. +/// be evicted. +/// Return false if any interference is heavier than MaxWeight. +/// On return, set MaxWeight to the maximal spill weight of an interference. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - unsigned Size, float &MaxWeight) { + float &MaxWeight) { float Weight = 0; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - // If there is 10 or more interferences, chances are one is smaller. - if (Q.collectInterferingVRegs(10) >= 10) + // If there is 10 or more interferences, chances are one is heavier. + if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) return false; - // CHeck if any interfering live range is shorter than VirtReg. - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->getSize() <= Size) + if (Intf->weight >= MaxWeight) return false; Weight = std::max(Weight, Intf->weight); } @@ -353,25 +410,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs){ + SmallVectorImpl<LiveInterval*> &NewVRegs, + unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // We can only evict interference if all interfering registers are virtual and - // longer than VirtReg. - const unsigned Size = VirtReg.getSize(); - // Keep track of the lightest single interference seen so far. - float BestWeight = 0; + float BestWeight = VirtReg.weight; unsigned BestPhys = 0; Order.rewind(); while (unsigned PhysReg = Order.next()) { - float Weight = 0; - if (!canEvictInterference(VirtReg, PhysReg, Size, Weight)) + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + continue; + // The first use of a register in a function has cost 1. + if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) + continue; + + float Weight = BestWeight; + if (!canEvictInterference(VirtReg, PhysReg, Weight)) continue; // This is an eviction candidate. - DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " << Weight << '\n'); if (BestPhys && Weight >= BestWeight) continue; @@ -406,201 +466,228 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Region Splitting //===----------------------------------------------------------------------===// -/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints -/// when considering interference from PhysReg. Also compute an optimistic local -/// cost of this interference pattern. -/// -/// The final cost of a split is the local cost + global cost of preferences -/// broken by SpillPlacement. -/// -float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { +/// addSplitConstraints - Fill out the SplitConstraints vector based on the +/// interference pattern in Physreg and its aliases. Add the constraints to +/// SpillPlacement and return the static cost of this split in Cost, assuming +/// that all preferences in SplitConstraints are met. +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + float &Cost) { + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + // Reset interference dependent info. - SpillConstraints.resize(SA->LiveBlocks.size()); - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + SplitConstraints.resize(UseBlocks.size()); + float StaticCost = 0; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + BC.Number = BI.MBB->getNumber(); - BC.Entry = (BI.Uses && BI.LiveIn) ? - SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.Exit = (BI.Uses && BI.LiveOut) ? - SpillPlacement::PrefReg : SpillPlacement::DontCare; - BI.OverlapEntry = BI.OverlapExit = false; - } + Intf.moveToBlock(BC.Number); + BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - // Add interference info from each PhysReg alias. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(VirtReg, *AI).checkInterference()) + if (!Intf.hasInterference()) continue; - LiveIntervalUnion::SegmentIter IntI = - PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); - if (!IntI.valid()) - continue; - - // Determine which blocks have interference live in or after the last split - // point. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; - - // Is the interference live-in? - if (BI.LiveIn) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() <= Start) - BC.Entry = SpillPlacement::MustSpill; - } + // Number of spill code instructions to insert. + unsigned Ins = 0; + + // Interference for the live-in value. + if (BI.LiveIn) { + if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) + BC.Entry = SpillPlacement::MustSpill, ++Ins; + else if (Intf.first() < BI.FirstUse) + BC.Entry = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) + ++Ins; + } - // Is the interference overlapping the last split point? - if (BI.LiveOut) { - if (IntI.stop() < BI.LastSplitPoint) - IntI.advanceTo(BI.LastSplitPoint.getPrevSlot()); - if (!IntI.valid()) - break; - if (IntI.start() < Stop) - BC.Exit = SpillPlacement::MustSpill; - } + // Interference for the live-out value. + if (BI.LiveOut) { + if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) + BC.Exit = SpillPlacement::MustSpill, ++Ins; + else if (Intf.last() > BI.LastUse) + BC.Exit = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def)) + ++Ins; } - // Rewind iterator and check other interferences. - IntI.find(VirtReg.beginIndex()); - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + // Accumulate the total frequency of inserted spill code. + if (Ins) + StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + Cost = StaticCost; - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; + // Add constraints for use-blocks. Note that these are the only constraints + // that may add a positive bias, it is downhill from here. + SpillPlacer->addConstraints(SplitConstraints); + return SpillPlacer->scanActiveBundles(); +} - // Handle transparent blocks with interference separately. - // Transparent blocks never incur any fixed cost. - if (BI.LiveThrough && !BI.Uses) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() >= Stop) - continue; - if (BC.Entry != SpillPlacement::MustSpill) - BC.Entry = SpillPlacement::PrefSpill; - if (BC.Exit != SpillPlacement::MustSpill) - BC.Exit = SpillPlacement::PrefSpill; - continue; +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef<unsigned> Blocks) { + const unsigned GroupSize = 8; + SpillPlacement::BlockConstraint BCS[GroupSize]; + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; + + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; + Intf.moveToBlock(Number); + + if (!Intf.hasInterference()) { + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); + T = 0; } + continue; + } - // Now we only have blocks with uses left. - // Check if the interference overlaps the uses. - assert(BI.Uses && "Non-transparent block without any uses"); - - // Check interference on entry. - if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - // Not live in, but before the first use. - if (IntI.start() < BI.FirstUse) { - BC.Entry = SpillPlacement::PrefSpill; - // If the block contains a kill from an earlier split, never split - // again in the same block. - if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill)) - BC.Entry = SpillPlacement::MustSpill; - } - } + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + + // Interference for the live-in value. + if (Intf.first() <= Indexes->getMBBStartIdx(Number)) + BCS[B].Entry = SpillPlacement::MustSpill; + else + BCS[B].Entry = SpillPlacement::PrefSpill; + + // Interference for the live-out value. + if (Intf.last() >= SA->getLastSplitPoint(Number)) + BCS[B].Exit = SpillPlacement::MustSpill; + else + BCS[B].Exit = SpillPlacement::PrefSpill; + + if (++B == GroupSize) { + ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); + SpillPlacer->addConstraints(Array); + B = 0; + } + } - // Does interference overlap the uses in the entry segment - // [FirstUse;Kill)? - if (BI.LiveIn && !BI.OverlapEntry) { - IntI.advanceTo(BI.FirstUse); - if (!IntI.valid()) - break; - // A live-through interval has no kill. - // Check [FirstUse;LastUse) instead. - if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) - BI.OverlapEntry = true; - } + ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); + SpillPlacer->addConstraints(Array); + SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T)); +} - // Does interference overlap the uses in the exit segment [Def;LastUse)? - if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) { - IntI.advanceTo(BI.Def); - if (!IntI.valid()) - break; - if (IntI.start() < BI.LastUse) - BI.OverlapExit = true; - } +void RAGreedy::growRegion(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif - // Check interference on exit. - if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) { - // Check interference between LastUse and Stop. - if (BC.Exit != SpillPlacement::PrefSpill) { - IntI.advanceTo(BI.LastUse); - if (!IntI.valid()) - break; - if (IntI.start() < Stop) { - BC.Exit = SpillPlacement::PrefSpill; - // Avoid splitting twice in the same block. - if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def)) - BC.Exit = SpillPlacement::MustSpill; - } - } + for (;;) { + ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); + if (NewBundles.empty()) + break; + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) + continue; + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif } } + // Any new blocks to add? + if (ActiveBlocks.size() > AddedTo) { + ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo], + ActiveBlocks.size() - AddedTo); + addThroughConstraints(Intf, Add); + AddedTo = ActiveBlocks.size(); + } + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); } + DEBUG(dbgs() << ", v=" << Visited); +} - // Accumulate a local cost of this interference pattern. - float LocalCost = 0; - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - if (!BI.Uses) - continue; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - unsigned Inserts = 0; - - // Do we need spill code for the entry segment? - if (BI.LiveIn) - Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg; - - // For the exit segment? - if (BI.LiveOut) - Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg; - - // The local cost of spill code in this block is the block frequency times - // the number of spill instructions inserted. - if (Inserts) - LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB); +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +float RAGreedy::calcSpillCost() { + float Cost = 0; + const LiveInterval &LI = SA->getParent(); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); + + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(Number); + LiveInterval::const_iterator I = LI.find(Start); + assert(I != LI.end() && "Expected live-in value"); + // Is there a different live-out value? If so, we need an extra spill + // instruction. + if (I->end < Stop) + Cost += SpillPlacer->getBlockFrequency(Number); + } } - DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = " - << LocalCost << '\n'); - return LocalCost; + return Cost; } /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the -/// interference pattern in SpillConstraints. +/// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { float GlobalCost = 0; - for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) { - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - unsigned Inserts = 0; - // Broken entry preference? - Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] != - (BC.Entry == SpillPlacement::PrefReg); - // Broken exit preference? - Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] != - (BC.Exit == SpillPlacement::PrefReg); - if (Inserts) - GlobalCost += - Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB); + const BitVector &LiveBundles = Cand.LiveBundles; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + unsigned Ins = 0; + + if (BI.LiveIn) + Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); + if (BI.LiveOut) + Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); + if (Ins) + GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + if (!RegIn && !RegOut) + continue; + if (RegIn && RegOut) { + // We need double spill code if this block has interference. + Intf.moveToBlock(Number); + if (Intf.hasInterference()) + GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + continue; + } + // live-in / stack-out or stack-in live-out. + GlobalCost += SpillPlacer->getBlockFrequency(Number); } - DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n'); return GlobalCost; } @@ -611,113 +698,74 @@ float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { /// avoiding interference. The 'stack' interval is the complement constructed by /// SplitEditor. It will contain the rest. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, - const BitVector &LiveBundles, +void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, + GlobalSplitCandidate &Cand, SmallVectorImpl<LiveInterval*> &NewVRegs) { + const BitVector &LiveBundles = Cand.LiveBundles; + DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) + dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) << " with bundles"; for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - // First compute interference ranges in the live blocks. - typedef std::pair<SlotIndex, SlotIndex> IndexPair; - SmallVector<IndexPair, 8> InterferenceRanges; - InterferenceRanges.resize(SA->LiveBlocks.size()); - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(VirtReg, *AI).checkInterference()) - continue; - LiveIntervalUnion::SegmentIter IntI = - PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); - if (!IntI.valid()) - continue; - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - IndexPair &IP = InterferenceRanges[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; - - // First interference in block. - if (BI.LiveIn) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() >= Stop) - continue; - if (!IP.first.isValid() || IntI.start() < IP.first) - IP.first = IntI.start(); - } - - // Last interference in block. - if (BI.LiveOut) { - IntI.advanceTo(Stop); - if (!IntI.valid() || IntI.start() >= Stop) - --IntI; - if (IntI.stop() <= Start) - continue; - if (!IP.second.isValid() || IntI.stop() > IP.second) - IP.second = IntI.stop(); - } - } - } - - SmallVector<LiveInterval*, 4> SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); // Create the main cross-block interval. - SE.openIntv(); + const unsigned MainIntv = SE->openIntv(); // First add all defs that are live out of a block. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + // Create separate intervals for isolated blocks with multiple uses. + if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); + continue; + } + // Should the register be live out? if (!BI.LiveOut || !RegOut) continue; - IndexPair &IP = InterferenceRanges[i]; SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - + Intf.moveToBlock(BI.MBB->getNumber()); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " intf [" << IP.first << ';' << IP.second << ')'); + << " [" << Start << ';' + << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); // The interference interval should either be invalid or overlap MBB. - assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference"); - assert((!IP.second.isValid() || IP.second > Start) && "Bad interference"); + assert((!Intf.hasInterference() || Intf.first() < Stop) + && "Bad interference"); + assert((!Intf.hasInterference() || Intf.last() > Start) + && "Bad interference"); // Check interference leaving the block. - if (!IP.second.isValid()) { + if (!Intf.hasInterference()) { // Block is interference-free. DEBUG(dbgs() << ", no interference"); - if (!BI.Uses) { - assert(BI.LiveThrough && "No uses, but not live through block?"); - // Block is live-through without interference. - DEBUG(dbgs() << ", no uses" - << (RegIn ? ", live-through.\n" : ", stack in.\n")); - if (!RegIn) - SE.enterIntvAtEnd(*BI.MBB); - continue; - } if (!BI.LiveThrough) { DEBUG(dbgs() << ", not live-through.\n"); - SE.useIntv(SE.enterIntvBefore(BI.Def), Stop); + SE->useIntv(SE->enterIntvBefore(BI.Def), Stop); continue; } if (!RegIn) { // Block is live-through, but entry bundle is on the stack. // Reload just before the first use. DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop); + SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); continue; } DEBUG(dbgs() << ", live-through.\n"); @@ -725,53 +773,45 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, } // Block has interference. - DEBUG(dbgs() << ", interference to " << IP.second); + DEBUG(dbgs() << ", interference to " << Intf.last()); - if (!BI.LiveThrough && IP.second <= BI.Def) { + if (!BI.LiveThrough && Intf.last() <= BI.Def) { // The interference doesn't reach the outgoing segment. DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); - SE.useIntv(BI.Def, Stop); + SE->useIntv(BI.Def, Stop); continue; } - - if (!BI.Uses) { - // No uses in block, avoid interference by reloading as late as possible. - DEBUG(dbgs() << ", no uses.\n"); - SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); - assert(SegStart >= IP.second && "Couldn't avoid interference"); - continue; - } - - if (IP.second.getBoundaryIndex() < BI.LastUse) { + SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); + if (Intf.last().getBoundaryIndex() < BI.LastUse) { // There are interference-free uses at the end of the block. // Find the first use that can get the live-out register. SmallVectorImpl<SlotIndex>::const_iterator UI = std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - IP.second.getBoundaryIndex()); + Intf.last().getBoundaryIndex()); assert(UI != SA->UseSlots.end() && "Couldn't find last use"); SlotIndex Use = *UI; assert(Use <= BI.LastUse && "Couldn't find last use"); // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= BI.LastSplitPoint) { + if (Use.getBaseIndex() <= LastSplitPoint) { DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE.enterIntvBefore(Use); - assert(SegStart >= IP.second && "Couldn't avoid interference"); - assert(SegStart < BI.LastSplitPoint && "Impossible split point"); - SE.useIntv(SegStart, Stop); + SlotIndex SegStart = SE->enterIntvBefore(Use); + assert(SegStart >= Intf.last() && "Couldn't avoid interference"); + assert(SegStart < LastSplitPoint && "Impossible split point"); + SE->useIntv(SegStart, Stop); continue; } } // Interference is after the last use. DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); - assert(SegStart >= IP.second && "Couldn't avoid interference"); + SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); + assert(SegStart >= Intf.last() && "Couldn't avoid interference"); } // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; @@ -780,152 +820,207 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, continue; // We have an incoming register. Check for interference. - IndexPair &IP = InterferenceRanges[i]; SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - + Intf.moveToBlock(BI.MBB->getNumber()); DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber()); + << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' + << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ')'); // Check interference entering the block. - if (!IP.first.isValid()) { + if (!Intf.hasInterference()) { // Block is interference-free. DEBUG(dbgs() << ", no interference"); - if (!BI.Uses) { - assert(BI.LiveThrough && "No uses, but not live through block?"); - // Block is live-through without interference. - if (RegOut) { - DEBUG(dbgs() << ", no uses, live-through.\n"); - SE.useIntv(Start, Stop); - } else { - DEBUG(dbgs() << ", no uses, stack-out.\n"); - SE.leaveIntvAtTop(*BI.MBB); - } - continue; - } if (!BI.LiveThrough) { DEBUG(dbgs() << ", killed in block.\n"); - SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill)); continue; } if (!RegOut) { + SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); // Block is live-through, but exit bundle is on the stack. // Spill immediately after the last use. - if (BI.LastUse < BI.LastSplitPoint) { + if (BI.LastUse < LastSplitPoint) { DEBUG(dbgs() << ", uses, stack-out.\n"); - SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); continue; } // The last use is after the last split point, it is probably an // indirect jump. DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << BI.LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint); - SE.useIntv(Start, SegEnd); + << LastSplitPoint << ", stack-out.\n"); + SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); + SE->useIntv(Start, SegEnd); // Run a double interval from the split to the last use. // This makes it possible to spill the complement without affecting the // indirect branch. - SE.overlapIntv(SegEnd, BI.LastUse); + SE->overlapIntv(SegEnd, BI.LastUse); continue; } // Register is live-through. DEBUG(dbgs() << ", uses, live-through.\n"); - SE.useIntv(Start, Stop); + SE->useIntv(Start, Stop); continue; } // Block has interference. - DEBUG(dbgs() << ", interference from " << IP.first); + DEBUG(dbgs() << ", interference from " << Intf.first()); - if (!BI.LiveThrough && IP.first >= BI.Kill) { + if (!BI.LiveThrough && Intf.first() >= BI.Kill) { // The interference doesn't reach the outgoing segment. DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); - SE.useIntv(Start, BI.Kill); + SE->useIntv(Start, BI.Kill); continue; } - if (!BI.Uses) { - // No uses in block, avoid interference by spilling as soon as possible. - DEBUG(dbgs() << ", no uses.\n"); - SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); - continue; - } - if (IP.first.getBaseIndex() > BI.FirstUse) { + if (Intf.first().getBaseIndex() > BI.FirstUse) { // There are interference-free uses at the beginning of the block. // Find the last use that can get the register. SmallVectorImpl<SlotIndex>::const_iterator UI = std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - IP.first.getBaseIndex()); + Intf.first().getBaseIndex()); assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); SlotIndex Use = (--UI)->getBoundaryIndex(); DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE.leaveIntvAfter(Use); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); - SE.useIntv(Start, SegEnd); + SlotIndex SegEnd = SE->leaveIntvAfter(Use); + assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); + SE->useIntv(Start, SegEnd); continue; } // Interference is before the first use. DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); + SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); + assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); } - SE.closeIntv(); + // Handle live-through blocks. + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + DEBUG(dbgs() << "Live through BB#" << Number << '\n'); + if (RegIn && RegOut) { + Intf.moveToBlock(Number); + if (!Intf.hasInterference()) { + SE->useIntv(Indexes->getMBBStartIdx(Number), + Indexes->getMBBEndIdx(Number)); + continue; + } + } + MachineBasicBlock *MBB = MF->getBlockNumbered(Number); + if (RegIn) + SE->leaveIntvAtTop(*MBB); + if (RegOut) + SE->enterIntvAtEnd(*MBB); + } - // FIXME: Should we be more aggressive about splitting the stack region into - // per-block segments? The current approach allows the stack region to - // separate into connected components. Some components may be allocatable. - SE.finish(); ++NumGlobalSplits; - if (VerifyEnabled) { - MF->verify(this, "After splitting live range around region"); + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + LRStage.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumThroughBlocks() + SA->getUseBlocks().size(); + + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + unsigned Reg = LREdit.get(i)->reg; + + // Ignore old intervals from DCE. + if (LRStage[Reg] != RS_New) + continue; -#ifndef NDEBUG - // Make sure that at least one of the new intervals can allocate to PhysReg. - // That was the whole point of splitting the live range. - bool found = false; - for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E; - ++I) - if (!checkUncachedInterference(**I, PhysReg)) { - found = true; - break; + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + LRStage[Reg] = RS_Global; + continue; + } + + // Main interval. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] == MainIntv) { + if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + LRStage[Reg] = RS_Global; } - assert(found && "No allocatable intervals after pointless splitting"); -#endif + continue; + } + + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around region"); } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { - BitVector LiveBundles, BestBundles; - float BestCost = 0; - unsigned BestReg = 0; + float BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + const unsigned NoCand = ~0u; + unsigned BestCand = NoCand; + Order.rewind(); - while (unsigned PhysReg = Order.next()) { - float Cost = calcInterferenceInfo(VirtReg, PhysReg); - if (BestReg && Cost >= BestCost) + for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { + if (GlobalCand.size() <= Cand) + GlobalCand.resize(Cand+1); + GlobalCand[Cand].reset(PhysReg); + + SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); + float Cost; + InterferenceCache::Cursor Intf(IntfCache, PhysReg); + if (!addSplitConstraints(Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); + continue; + } + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); continue; + } + growRegion(GlobalCand[Cand], Intf); + + SpillPlacer->finish(); - SpillPlacer->placeSpills(SpillConstraints, LiveBundles); // No live bundles, defer to splitSingleBlocks(). - if (!LiveBundles.any()) + if (!GlobalCand[Cand].LiveBundles.any()) { + DEBUG(dbgs() << " no bundles.\n"); continue; + } - Cost += calcGlobalSplitCost(LiveBundles); - if (!BestReg || Cost < BestCost) { - BestReg = PhysReg; - BestCost = Cost; - BestBundles.swap(LiveBundles); + Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); + DEBUG({ + dbgs() << ", total = " << Cost << " with bundles"; + for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; + i = GlobalCand[Cand].LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + if (Cost < BestCost) { + BestCand = Cand; + BestCost = Hysteresis * Cost; // Prevent rounding effects. } } - if (!BestReg) + if (BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); + splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); return 0; } @@ -942,8 +1037,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl<float> &GapWeight) { - assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; const unsigned NumGaps = Uses.size()-1; @@ -1034,8 +1129,8 @@ unsigned RAGreedy::nextSplitPoint(unsigned i) { /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*> &NewVRegs) { - assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); // Note that it is possible to have an interval that is live-in or live-out // while only covering a single block - A phi-def can use undef values from @@ -1065,7 +1160,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB); + const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); SmallVector<float, 8> GapWeight; Order.rewind(); @@ -1130,13 +1225,13 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, PrevSlot[SplitBefore].distance(Uses[SplitAfter])); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. - float Diff = EstWeight - MaxGap; - DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff); - if (Diff > 0) { + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { Shrink = false; + float Diff = EstWeight - MaxGap; if (Diff > BestDiff) { DEBUG(dbgs() << " (best)"); - BestDiff = Diff; + BestDiff = Hysteresis * Diff; BestBefore = SplitBefore; BestAfter = SplitAfter; } @@ -1181,16 +1276,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - SmallVector<LiveInterval*, 4> SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); - SE.openIntv(); - SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]); - SlotIndex SegStop = SE.leaveIntvAfter(Uses[BestAfter]); - SE.useIntv(SegStart, SegStop); - SE.closeIntv(); - SE.finish(); + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); + SE->useIntv(SegStart, SegStop); + SE->finish(); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local); ++NumLocalSplits; return 0; @@ -1205,16 +1299,22 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<LiveInterval*>&NewVRegs) { - SA->analyze(&VirtReg); - // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + SA->analyze(&VirtReg); return tryLocalSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + // Don't iterate global splitting. + // Move straight to spilling if this range was produced by a global split. + if (getStage(VirtReg) >= RS_Global) + return 0; + + SA->analyze(&VirtReg); + // First try to split around a region spanning multiple blocks. unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) @@ -1223,9 +1323,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // Then isolate blocks with multiple uses. SplitAnalysis::BlockPtrSet Blocks; if (SA->getMultiUseBlocks(Blocks)) { - SmallVector<LiveInterval*, 4> SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); + SE->splitSingleBlocks(Blocks); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); if (VerifyEnabled) MF->verify(this, "After splitting live range around basic blocks"); } @@ -1236,68 +1337,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// -// Spilling -//===----------------------------------------------------------------------===// - -/// calcInterferenceWeight - Calculate the combined spill weight of -/// interferences when assigning VirtReg to PhysReg. -float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){ - float Sum = 0; - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); - Q.collectInterferingVRegs(); - if (Q.seenUnspillableVReg()) - return HUGE_VALF; - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) - Sum += Q.interferingVRegs()[i]->weight; - } - return Sum; -} - -/// trySpillInterferences - Try to spill interfering registers instead of the -/// current one. Only do it if the accumulated spill weight is smaller than the -/// current spill weight. -unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl<LiveInterval*> &NewVRegs) { - NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled); - unsigned BestPhys = 0; - float BestWeight = 0; - - Order.rewind(); - while (unsigned PhysReg = Order.next()) { - float Weight = calcInterferenceWeight(VirtReg, PhysReg); - if (Weight == HUGE_VALF || Weight >= VirtReg.weight) - continue; - if (!BestPhys || Weight < BestWeight) - BestPhys = PhysReg, BestWeight = Weight; - } - - // No candidates found. - if (!BestPhys) - return 0; - - // Collect all interfering registers. - SmallVector<LiveInterval*, 8> Spills; - for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); - Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end()); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *VReg = Q.interferingVRegs()[i]; - unassign(*VReg, *AI); - } - } - - // Spill them all. - DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight " - << BestWeight << '\n'); - for (unsigned i = 0, e = Spills.size(); i != e; ++i) - spiller().spill(Spills[i], NewVRegs, Spills); - return BestPhys; -} - - -//===----------------------------------------------------------------------===// // Main Entry Point //===----------------------------------------------------------------------===// @@ -1305,12 +1344,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl<LiveInterval*> &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - if (!checkPhysRegInterference(VirtReg, PhysReg)) - return PhysReg; - } - - if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) @@ -1321,25 +1355,29 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - if (Generation[VirtReg.reg] == 1) { + LiveRangeStage Stage = getStage(VirtReg); + if (Stage == RS_First) { + LRStage[VirtReg.reg] = RS_Second; + DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; } + assert(Stage < RS_Spill && "Cannot allocate after spilling"); + // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) return PhysReg; - // Try to spill another interfering reg with less spill weight. - PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs); - if (PhysReg) - return PhysReg; - // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - SmallVector<LiveInterval*, 1> pendingSpills; - spiller().spill(&VirtReg, NewVRegs, pendingSpills); + LiveRangeEdit LRE(VirtReg, NewVRegs, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); + + if (VerifyEnabled) + MF->verify(this, "After spilling"); // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -1366,6 +1404,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis<SpillPlacement>(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + LRStage.clear(); + LRStage.resize(MRI->getNumVirtRegs()); + IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); allocatePhysRegs(); addMBBLiveIns(MF); @@ -1377,6 +1419,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { VRM->rewrite(Indexes); } + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index b959878bcdba..5ef88cb74ba5 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveDebugVariables.h" +#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "Spiller.h" @@ -39,7 +40,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> -#include <set> #include <queue> #include <memory> #include <cmath> @@ -66,6 +66,11 @@ TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); +static cl::opt<bool> +AvoidWAWHazard("avoid-waw-hazard", + cl::desc("Avoid write-write hazards for some register classes"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -109,6 +114,7 @@ namespace { if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); RecentNext = RecentRegs.begin(); + avoidWAW_ = 0; } typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr; @@ -179,6 +185,9 @@ namespace { SmallVector<unsigned, 4> RecentRegs; SmallVector<unsigned, 4>::iterator RecentNext; + // Last write-after-write register written. + unsigned avoidWAW_; + // Record that we just picked this register. void recordRecentlyUsed(unsigned reg) { assert(reg != 0 && "Recently used register is NOREG!"); @@ -226,8 +235,8 @@ namespace { // Determine if we skip this register due to its being recently used. bool isRecentlyUsed(unsigned reg) const { - return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != - RecentRegs.end(); + return reg == avoidWAW_ || + std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); } private: @@ -374,7 +383,7 @@ namespace { dbgs() << str << " intervals:\n"; for (; i != e; ++i) { - dbgs() << "\t" << *i->first << " -> "; + dbgs() << '\t' << *i->first << " -> "; unsigned reg = i->first->reg; if (TargetRegisterInfo::isVirtualRegister(reg)) @@ -389,7 +398,7 @@ namespace { } INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) + "Linear Scan Register Allocator", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) @@ -400,7 +409,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) + "Linear Scan Register Allocator", false, false) void RALinScan::ComputeRelatedRegClasses() { // First pass, add all reg classes to the union, and determine at least one @@ -458,7 +467,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { const LiveRange &range = cur.ranges.front(); VNInfo *vni = range.valno; - if (vni->isUnused()) + if (vni->isUnused() || !vni->def.isValid()) return Reg; unsigned CandReg; @@ -571,7 +580,7 @@ void RALinScan::initIntervalSets() for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty()) { + if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { mri_->setPhysRegUsed(i->second->reg); fixed_.push_back(std::make_pair(i->second, i->second->begin())); } @@ -791,7 +800,7 @@ void RALinScan::updateSpillWeights(std::vector<float> &Weights, // register class we are trying to allocate. Then add the weight to all // sub-registers of the super-register even if they are not aliases. // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be choosen + // bl should get the same spill weight otherwise it will be chosen // as a spill candidate since spilling bh doesn't make ebx available. for (unsigned i = 0, e = Supers.size(); i != e; ++i) { for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) @@ -993,7 +1002,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // one, e.g. X86::mov32to32_. These move instructions are not coalescable. if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused()) { + if (!vni->isUnused() && vni->def.isValid()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); if (CopyMI && CopyMI->isCopy()) { unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); @@ -1109,11 +1118,18 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // list. if (physReg) { DEBUG(dbgs() << tri_->getName(physReg) << '\n'); + assert(RC->contains(physReg) && "Invalid candidate"); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); + // Remember physReg for avoiding a write-after-write hazard in the next + // instruction. + if (AvoidWAWHazard && + tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) + avoidWAW_ = physReg; + // "Upgrade" the physical register since it has been allocated. UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { @@ -1229,8 +1245,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector<LiveInterval*, 8> spillIs, added; - spiller_->spill(cur, added, spillIs); + SmallVector<LiveInterval*, 8> added; + LiveRangeEdit LRE(*cur, added); + spiller_->spill(LRE); std::sort(added.begin(), added.end(), LISorter()); if (added.empty()) @@ -1306,7 +1323,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - spiller_->spill(sli, added, spillIs); + LiveRangeEdit LRE(*sli, added, 0, &spillIs); + spiller_->spill(LRE); spilled.insert(sli->reg); } @@ -1442,7 +1460,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; // Skip recently allocated registers. - if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; @@ -1473,7 +1491,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { + FreeRegInactiveCount < inactiveCounts[Reg] && + (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -1524,12 +1543,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { return Preference; } - if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - } + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index ea0d1fe0233f..1e1f1e0d3470 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -534,10 +534,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vregsToAlloc.erase(vreg); const LiveInterval* spillInterval = &lis->getInterval(vreg); double oldWeight = spillInterval->weight; - SmallVector<LiveInterval*, 8> spillIs; rmf->rememberUseDefs(spillInterval); std::vector<LiveInterval*> newSpills = - lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); + lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); addStackInterval(spillInterval, mri); rmf->rememberSpills(spillInterval, newSpills); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index a2580b85bcc3..ebfe533838d5 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -126,9 +126,10 @@ void RegScavenger::forward() { MBBI = MBB->begin(); Tracking = true; } else { - assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); MBBI = llvm::next(MBBI); } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); MachineInstr *MI = MBBI; @@ -241,12 +242,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { /// getRegsAvailable - Return all available registers in the register class /// in Mask. -void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, - BitVector &Mask) { +BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { + BitVector Mask(TRI->getNumRegs()); for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) if (!isAliasUsed(*I)) Mask.set(*I); + return Mask; } /// findSurvivorReg - Return the candidate register that is unused for the @@ -335,9 +337,13 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Try to find a register that's unused if there is one, as then we won't - // have to spill. - if ((Candidates & RegsAvailable).any()) - Candidates &= RegsAvailable; + // have to spill. Search explicitly rather than masking out based on + // RegsAvailable, as RegsAvailable does not take aliases into account. + // That's what getRegsAvailable() is for. + BitVector Available = getRegsAvailable(RC); + + if ((Candidates & Available).any()) + Candidates &= Available; // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index cbfd5a23d63d..c8de3823553c 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -47,7 +47,7 @@ outputFileSuffix("rmf-file-suffix", static cl::opt<std::string> machineFuncsToRender("rmf-funcs", - cl::desc("Coma seperated list of functions to render" + cl::desc("Comma separated list of functions to render" ", or \"*\"."), cl::init(""), cl::Hidden); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3388889c9e91..1302395f423e 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -472,7 +472,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { #endif } -/// AddPred - Updates the topological ordering to accomodate an edge +/// AddPred - Updates the topological ordering to accommodate an edge /// to be added from SUnit X to SUnit Y. void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; @@ -490,7 +490,7 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { } } -/// RemovePred - Updates the topological ordering to accomodate an +/// RemovePred - Updates the topological ordering to accommodate an /// an edge to be removed from the specified node N from the predecessors /// of the current node M. void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f17023eabb72..67c209ea1977 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -371,7 +371,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // will be overlapped by work done outside the current // scheduling region. Latency -= std::min(Latency, Count); - // Add the artifical edge. + // Add the artificial edge. ExitSU.addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 027f6150e26b..4b55a2284f85 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -51,7 +51,8 @@ namespace llvm { /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, - SUnitIterator EI) { + SUnitIterator EI, + const ScheduleDAG *Graph) { if (EI.isArtificialDep()) return "color=cyan,style=dashed"; if (EI.isCtrlDep()) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9cc70a30927d..f42751167a45 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -319,6 +319,10 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorkList(N); } +void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { + ((DAGCombiner*)DC)->removeFromWorkList(N); +} + SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); @@ -1290,6 +1294,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } +/// isCarryMaterialization - Returns true if V is an ADDE node that is known to +/// return 0 or 1 depending on the carry flag. +static bool isCarryMaterialization(SDValue V) { + if (V.getOpcode() != ISD::ADDE) + return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0)); + return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1453,6 +1467,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), + N0.getOperand(2)); + + // add X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), + N1.getOperand(2)); + return SDValue(); } @@ -1496,6 +1522,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } + // addc (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, + DAG.getConstant(0, VT), N0.getOperand(2)); + + // addc X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, + DAG.getConstant(0, VT), N1.getOperand(2)); + return SDValue(); } @@ -1506,6 +1542,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + // If both operands are null we know that carry out will always be false. + if (N0C && N0C->isNullValue() && N0 == N1) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), + MVT::Glue)); + // canonicalize constant to RHS if (N0C && !N1C) return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), @@ -3281,8 +3323,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); + N0.getOperand(0), + DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); } @@ -3688,7 +3732,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (load x)) -> (sext (truncate (sextload x))) // None of the supported targets knows how to perform load and sign extend - // in one instruction. We only perform this transformation on scalars. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { @@ -3839,7 +3884,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3892,7 +3937,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (load x)) -> (zext (truncate (zextload x))) // None of the supported targets knows how to perform load and vector_zext - // in one instruction. We only perform this transformation on scalar zext. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { @@ -4066,7 +4112,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4101,7 +4147,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext - // in one instruction. We only perform this transformation on scalars. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { @@ -4514,7 +4561,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currenly we only perform this optimization on scalars because vectors + // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { SDValue Shorter = @@ -5101,7 +5148,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128) + if (N0C && OpVT != MVT::ppcf128 && + // ...but only if the target supports immediate floating-point values + (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5123,7 +5172,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128) + if (N0C && OpVT != MVT::ppcf128 && + // ...but only if the target supports immediate floating-point values + (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -5817,8 +5868,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // value. // TODO: Handle store large -> read small portion. // TODO: Handle TRUNCSTORE/LOADEXT - if (LD->getExtensionType() == ISD::NON_EXTLOAD && - !LD->isVolatile()) { + if (ISD::isNormalLoad(N) && !LD->isVolatile()) { if (ISD::isNON_TRUNCStore(Chain.getNode())) { StoreSDNode *PrevST = cast<StoreSDNode>(Chain); if (PrevST->getBasePtr() == Ptr && @@ -6217,6 +6267,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isNonTemporal(), OrigAlign); } + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { // NOTE: If the original store is volatile, this transform must not increase @@ -6250,8 +6304,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); - } else if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 490b857b0e9c..3af948288daf 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -121,10 +122,9 @@ unsigned FastISel::getRegForValue(const Value *V) { // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) { - unsigned Reg = I->second; - return Reg; - } + if (I != FuncInfo.ValueMap.end()) + return I->second; + unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; @@ -164,8 +164,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + if (CF->isNullValue()) { + Reg = TargetMaterializeFloatZero(CF); + } else { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + } if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { return false; } + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, + Op1IsKill, CI->getZExtValue(), + VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { - unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, - CI->getZExtValue()); - if (ResultReg != 0) { - // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); - return true; + uint64_t Imm = CI->getZExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && + isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; } + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; } // Check if the second operand is a constant float. @@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) { } bool FastISel::SelectCall(const User *I) { - const Function *F = cast<CallInst>(I)->getCalledFunction(); + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getArgOperand(0))) { + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. - unsigned IID = F->getIntrinsicID(); - switch (IID) { + switch (F->getIntrinsicID()) { default: break; case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast<DbgDeclareInst>(I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || !FuncInfo.MF->getMMI().hasDebugInfo()) return true; @@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast<DbgValueInst>(I); + const DbgValueInst *DI = cast<DbgValueInst>(Call); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { @@ -523,65 +575,58 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { - default: break; - case TargetLowering::Expand: { - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(I, ResultReg); - return true; - } - } - break; + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) + break; + + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(Call, ResultReg); + return true; } case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { - default: break; - case TargetLowering::Expand: { - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) + break; + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(cast<CallInst>(I)); + FuncInfo.CatchInfoLost.insert(Call); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(I); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; + if (Reg) FuncInfo.MBB->addLiveIn(Reg); + } - UpdateValueMap(I, ResultReg); + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(Call); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; - return true; - } - } - break; + UpdateValueMap(Call, ResultReg); + + return true; } } @@ -966,59 +1011,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) - return 0; - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); -} - -/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with a floating-point immediate operand using -/// FastEmit_rf. If that fails, it materializes the immediate into a register -/// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm, MVT ImmType) { - // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); - if (ResultReg != 0) - return ResultReg; - - // Materialize the constant in a register. - unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); if (MaterialReg == 0) { - // If the target doesn't have a way to directly enter a floating-point - // value into a register, use an alternate approach. - // TODO: The current approach only supports floating-point constants - // that can be constructed by conversion from integer values. This should - // be replaced by code that creates a load from a constant-pool entry, - // which will require some target-specific work. - const APFloat &Flt = FPImm->getValueAPF(); - EVT IntVT = TLI.getPointerTy(); - - uint64_t x[2]; - uint32_t IntBitWidth = IntVT.getSizeInBits(); - bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); - if (!isExact) - return 0; - APInt IntVal(IntBitWidth, 2, x); - - unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::Constant, IntVal.getZExtValue()); - if (IntegerReg == 0) - return 0; - MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); - if (MaterialReg == 0) - return 0; + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, @@ -1099,6 +1118,29 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -1160,6 +1202,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -1215,7 +1274,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't + // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 2ae3286829dd..d8a5770d36c0 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -448,16 +448,30 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, +void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { - // Apply the catch info to DestBB. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); + SmallPtrSet<const BasicBlock*, 4> Visited; + + // The 'eh.selector' call may not be in the direct successor of a basic block, + // but could be several successors deeper. If we don't find it, try going one + // level further. <rdar://problem/8824861> + while (Visited.insert(SuccBB)) { + for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); + I != E; ++I) + if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) { + // Apply the catch info to LPad. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); #ifndef NDEBUG - if (!FLI.MBBMap[SrcBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); + if (!FLI.MBBMap[SuccBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); #endif - } + return; + } + + const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator()); + if (Br && Br->isUnconditional()) + SuccBB = Br->getSuccessor(0); + else + break; + } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f08528fe2dc3..2b6c56eafd73 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -61,10 +61,10 @@ class SelectionDAGLegalize { // Libcall insertion helpers. - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been /// legalized. We use this to ensure that calls are properly serialized /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; + SmallVector<SDValue, 8> LastCALLSEQ; enum LegalizeAction { Legal, // The target natively supports this operation. @@ -142,6 +142,9 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -153,6 +156,7 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -178,6 +182,15 @@ private: void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); } + void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; } + void pushLastCALLSEQ(SDValue s) { + LastCALLSEQ.push_back(s); + } + void popLastCALLSEQ() { + LastCALLSEQ.pop_back(); + } }; } @@ -223,7 +236,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); + pushLastCALLSEQ(DAG.getEntryNode()); // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -251,14 +264,15 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. + int next_depth = depth; if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) + next_depth = depth + 1; + if (Node->getOpcode() == ISD::CALLSEQ_END) { + assert(depth > 0 && "negative depth!"); + if (depth == 1) return Node; + else + next_depth = depth - 1; } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -289,7 +303,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + if (SDNode *Result = FindCallEndFromCallStart(User, next_depth)) return Result; } return 0; @@ -786,7 +800,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(); + return SDValue(0, 0); } /// LegalizeOp - We know that the specified value has a legal type, and @@ -934,11 +948,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + // Branches tweak the chain to include LastCALLSEQ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); + getLastCALLSEQ()); Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); + setLastCALLSEQ(DAG.getEntryNode()); break; case ISD::SHL: case ISD::SRL: @@ -948,7 +963,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[1])); break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: @@ -956,7 +972,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2])); + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[2])); break; } @@ -1024,8 +1041,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case ISD::CALLSEQ_START: { - static int depth = 0; SDNode *CallEnd = FindCallEndFromCallStart(Node); + assert(CallEnd && "didn't find CALLSEQ_END!"); // Recursively Legalize all of the inputs of the call end that do not lead // to this call start. This ensures that any libcalls that need be inserted @@ -1042,9 +1059,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) { + if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); + Tmp1, getLastCALLSEQ()); Tmp1 = LegalizeOp(Tmp1); } @@ -1065,29 +1082,28 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - - SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ; // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); + setLastCALLSEQ(SDValue(CallEnd, 0)); - depth++; // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - depth--; - assert(depth >= 0 && "Un-matched CALLSEQ_START?"); - if (depth > 0) - LastCALLSEQ_END = Saved_LastCALLSEQ_END; + LegalizeOp(getLastCALLSEQ()); return Result; } case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; + { + SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node); + + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (getLastCALLSEQ().getNode() != Node) { + LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0)); + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0)); } // Otherwise, the call start has been legalized and everything is going @@ -1116,6 +1132,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } } // This finishes up call legalization. + popLastCALLSEQ(); + // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); if (Node->getNumValues() == 2) @@ -2035,9 +2053,43 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return DAG.getRoot(); // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair<SDValue,SDValue> CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -2072,7 +2124,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo; @@ -2112,6 +2164,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } +/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != 0; +} + +/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// needed. +static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { + unsigned OtherOpcode = 0; + if (isSigned) + OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; + else + OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + if (User->getOpcode() == OtherOpcode && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) + return true; + } + return false; +} + +/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem +/// pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + // Also pass the return address of the remainder. + SDValue FIPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = FIPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + DebugLoc dl = Node->getDebugLoc(); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + + // Remainder is loaded back from the stack frame. + SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr, + MachinePointerInfo(), false, false, 0); + Results.push_back(CallInfo.first); + Results.push_back(Rem); +} + /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -2759,7 +2918,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targetting a temporary location (a stack slot). + // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create @@ -3085,24 +3244,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) { + } else if (isSigned) Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128); - } else { + else Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128); - } Results.push_back(Tmp1); break; } @@ -3112,7 +3272,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) @@ -3141,6 +3303,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1.getValue(1)); break; } + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3225,6 +3392,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -3242,7 +3410,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3255,7 +3422,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; @@ -3266,15 +3432,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - - SDValue Ret = ExpandLibCall(LC, Node, isSigned); - BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); - TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, - DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); + + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); } + if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); @@ -3409,7 +3587,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + setLastCALLSEQ(DAG.getEntryNode()); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f0752df80f12..935aab0e59af 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1051,8 +1051,6 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; - case ISD::UMULO: - case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1428,9 +1426,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, HiOps[2] = Lo.getValue(1); Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); } - return; + return; } - + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); @@ -2128,31 +2126,6 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } -void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); - EVT VT = N->getValueType(0); - EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2); - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); - SplitInteger(Ret, Lo, Hi); - - // Now calculate overflow. - SDValue Ofl; - if (N->getOpcode() == ISD::UMULO) - Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, - DAG.getConstant(0, VT), ISD::SETNE); - else { - SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT); - Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp); - Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE); - } - ReplaceValueWith(SDValue(N, 1), Ofl); -} - void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 3f81bbbe4061..5409b88efaba 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -348,7 +348,6 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -523,6 +522,7 @@ private: SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -566,7 +566,6 @@ private: void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 167dbe0377b3..5d0f923afb0f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -58,6 +58,9 @@ class VectorLegalizer { SDValue UnrollVSETCC(SDValue Op); // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB // isn't legal. + // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + // SINT_TO_FLOAT and SHR on vectors isn't legal. + SDValue ExpandUINT_TO_FLOAT(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -207,7 +210,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::FNEG) + if (Node->getOpcode() == ISD::UINT_TO_FP) + Result = ExpandUINT_TO_FLOAT(Op); + else if (Node->getOpcode() == ISD::FNEG) Result = ExpandFNEG(Op); else if (Node->getOpcode() == ISD::VSETCC) Result = UnrollVSETCC(Op); @@ -251,6 +256,48 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { + + + EVT VT = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // Make sure that the SINT_TO_FP and SRL instructions are available. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + EVT SVT = VT.getScalarType(); + assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + + unsigned BW = SVT.getSizeInBits(); + SDValue HalfWord = DAG.getConstant(BW/2, VT); + + // Constants to clear the upper part of the word. + // Notice that we can also use SHL+SHR, but using a constant is slightly + // faster on x86. + uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + SDValue HalfWordMask = DAG.getConstant(HWMask, VT); + + // Two to the power of half-word-size. + SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); + + // Clear upper part of LO, lower HI + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + + // Convert hi and lo to floats + // Convert the hi part back to the upper values + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + + // Add the two halves + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); +} + + SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 182f8fcbfbf3..0b4dd357c39d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; @@ -63,27 +64,33 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + case ISD::ANY_EXTEND: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: + case ISD::FCEIL: case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FRINT: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::FFLOOR: - case ISD::FCEIL: - case ISD::FRINT: - case ISD::FNEARBYINT: - case ISD::UINT_TO_FP: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: R = ScalarizeVecRes_UnaryOp(N); break; @@ -145,6 +152,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { N->getOperand(0), N->getOperand(1)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + NewVT, Op, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), @@ -405,11 +419,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; - case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; @@ -427,32 +439,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; - case ISD::CTTZ: + case ISD::ANY_EXTEND: + case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTPOP: - case ISD::FNEG: + case ISD::CTTZ: case ISD::FABS: - case ISD::FSQRT: - case ISD::FSIN: + case ISD::FCEIL: case ISD::FCOS: - case ISD::FTRUNC: + case ISD::FEXP: + case ISD::FEXP2: case ISD::FFLOOR: - case ISD::FCEIL: - case ISD::FRINT: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - case ISD::FEXP: - case ISD::FEXP2: - case ISD::FLOG: - case ISD::FLOG2: - case ISD::FLOG10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -587,60 +602,6 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); } -void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - - SDValue DTyOpLo = DAG.getValueType(LoVT); - SDValue DTyOpHi = DAG.getValueType(HiVT); - - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); - - // Split the input. - SDValue VLo, VHi; - EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case Legal: { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case SplitVector: - GetSplitVector(N->getOperand(0), VLo, VHi); - break; - case WidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - } - - SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); - SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); - - Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, - CvtCode); - Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, - CvtCode); -} - void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -840,8 +801,25 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); + SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } } void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, @@ -989,11 +967,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FP_EXTEND: case ISD::FTRUNC: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: @@ -1270,15 +1248,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Shift(N); break; + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecRes_Convert(N); break; @@ -1286,15 +1265,20 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: + case ISD::FCEIL: case ISD::FCOS: - case ISD::FNEG: - case ISD::FSIN: - case ISD::FSQRT: case ISD::FEXP: case ISD::FEXP2: + case ISD::FFLOOR: case ISD::FLOG: - case ISD::FLOG2: case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; } @@ -2004,7 +1988,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; - case ISD::FP_ROUND: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index e3da2084529a..7b560d173ed3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -570,13 +570,20 @@ void ScheduleDAGFast::ListScheduleBottomUp() { TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. + // If cross copy register class is the same as RC, then it must be + // possible copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's + // possible to copy the value but it require cross register class copies + // and it is expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. SUnit *NewDef = 0; - if (DestRC) + if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical " + "register dependency!"); + } if (!NewDef) { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 0b548b277f4c..88bd4509b468 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -70,6 +70,50 @@ static cl::opt<bool> DisableSchedCycles( "disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling")); +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt<bool> DisableSchedRegPressure( + "disable-sched-reg-pressure", cl::Hidden, cl::init(false), + cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedLiveUses( + "disable-sched-live-uses", cl::Hidden, cl::init(true), + cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); +static cl::opt<bool> DisableSchedStalls( + "disable-sched-stalls", cl::Hidden, cl::init(true), + cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedCriticalPath( + "disable-sched-critical-path", cl::Hidden, cl::init(false), + cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedHeight( + "disable-sched-height", cl::Hidden, cl::init(false), + cl::desc("Disable scheduled-height priority in sched=list-ilp")); + +static cl::opt<int> MaxReorderWindow( + "max-sched-reorder", cl::Hidden, cl::init(6), + cl::desc("Number of instructions to allow ahead of the critical path " + "in sched=list-ilp")); + +static cl::opt<unsigned> AvgIPC( + "sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle whan no target itinerary exists.")); + +#ifndef NDEBUG +namespace { + // For sched=list-ilp, Count the number of times each factor comes into play. + enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, + FactStatic, FactOther, NumFactors }; +} +static const char *FactorName[NumFactors] = +{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; +static int FactorCount[NumFactors]; +#endif //!NDEBUG + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -103,6 +147,10 @@ private: /// MinAvailableCycle - Cycle of the soonest available instruction. unsigned MinAvailableCycle; + /// IssueCount - Count instructions issued in this cycle + /// Currently valid only for bottom-up scheduling. + unsigned IssueCount; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. @@ -234,8 +282,14 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); +#ifndef NDEBUG + for (int i = 0; i < NumFactors; ++i) { + FactorCount[i] = 0; + } +#endif //!NDEBUG CurCycle = 0; + IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -258,6 +312,11 @@ void ScheduleDAGRRList::Schedule() { else ListScheduleTopDown(); +#ifndef NDEBUG + for (int i = 0; i < NumFactors; ++i) { + DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); + } +#endif // !NDEBUG AvailableQueue->releaseState(); } @@ -295,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { if (Height < MinAvailableCycle) MinAvailableCycle = Height; - if (isReady(SU)) { + if (isReady(PredSU)) { AvailableQueue->push(PredSU); } // CapturePred and others may have left the node in the pending queue, avoid @@ -383,6 +442,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { if (NextCycle <= CurCycle) return; + IssueCount = 0; AvailableQueue->setCurCycle(NextCycle); if (!HazardRec->isEnabled()) { // Bypass lots of virtual calls in case of long latency. @@ -407,6 +467,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { if (DisableSchedCycles) return; + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); // Bump CurCycle to account for latency. We assume the latency of other @@ -477,6 +544,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { } } +static void resetVRegCycle(SUnit *SU); + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -486,12 +555,13 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the - // node it's ready cycle can aid heuristics, and after scheduling it can + // node its ready cycle can aid heuristics, and after scheduling it can // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); @@ -502,6 +572,12 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { AvailableQueue->ScheduledNode(SU); + // If HazardRec is disabled, and each inst counts as one cycle, then + // advance CurCycle before ReleasePredecessors to avoid useless pushes to + // PendingQueue for schedulers that implement HasReadyFilter. + if (!HazardRec->isEnabled() && AvgIPC < 2) + AdvanceToCycle(CurCycle + 1); + // Update liveness of predecessors before successors to avoid treating a // two-address node as a live range def. ReleasePredecessors(SU); @@ -518,16 +594,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } } + resetVRegCycle(SU); + SU->isScheduled = true; // Conditions under which the scheduler should eagerly advance the cycle: // (1) No available instructions // (2) All pipelines full, so available instructions must have hazards. // - // If HazardRec is disabled, count each inst as one cycle. - if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() - || AvailableQueue->empty()) - AdvanceToCycle(CurCycle + 1); + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. + // + // Check AvailableQueue after ReleasePredecessors in case of zero latency. + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -872,6 +957,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, AddPred(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, *I)); } + else { + // Avoid scheduling the def-side copy before other successors. Otherwise + // we could introduce another physreg interference on the copy and + // continue inserting copies indefinitely. + SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true); + AddPred(SuccSU, D); + } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); @@ -1077,13 +1171,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. + // If cross copy register class is the same as RC, then it must be possible + // copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's possible to + // copy the value but it require cross register class copies and it is + // expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. SUnit *NewDef = 0; - if (DestRC) + if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical register dependency!"); + } if (!NewDef) { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; @@ -1139,7 +1239,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - DEBUG(dbgs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into @@ -1318,7 +1418,7 @@ struct src_ls_rr_sort : public queue_sort { struct hybrid_ls_rr_sort : public queue_sort { enum { IsBottomUp = true, - HasReadyFilter = true + HasReadyFilter = false }; RegReductionPQBase *SPQ; @@ -1337,7 +1437,7 @@ struct hybrid_ls_rr_sort : public queue_sort { struct ilp_ls_rr_sort : public queue_sort { enum { IsBottomUp = true, - HasReadyFilter = true + HasReadyFilter = false }; RegReductionPQBase *SPQ; @@ -1395,7 +1495,7 @@ public: std::fill(RegPressure.begin(), RegPressure.end(), 0); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); } } @@ -1422,6 +1522,8 @@ public: unsigned getNodePriority(const SUnit *SU) const; unsigned getNodeOrdering(const SUnit *SU) const { + if (!SU->getNode()) return 0; + return scheduleDAG->DAG->GetOrdering(SU->getNode()); } @@ -1450,7 +1552,9 @@ public: bool HighRegPressure(const SUnit *SU) const; - bool MayReduceRegPressure(SUnit *SU); + bool MayReduceRegPressure(SUnit *SU) const; + + int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; void ScheduledNode(SUnit *SU); @@ -1538,6 +1642,20 @@ ILPBURRPriorityQueue; // Static Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned @@ -1576,17 +1694,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() { CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); } -void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); -} - void RegReductionPQBase::addNode(const SUnit *SU) { unsigned SUSize = SethiUllmanNumbers.size(); if (SUnits->size() > SUSize) @@ -1625,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { // If SU does not have a register def, schedule it close to its uses // because it does not lengthen any live ranges. return 0; +#if 1 return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif } //===----------------------------------------------------------------------===// @@ -1670,7 +1787,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { return false; } -bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { const SDNode *N = SU->getNode(); if (!N->isMachineOpcode() || !SU->NumSuccs) @@ -1688,10 +1805,60 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { return false; } +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { + LiveUses = 0; + int PDiff = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + if (PredSU->getNode()->isMachineOpcode()) + ++LiveUses; + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + ++PDiff; + } + } + const SDNode *N = SU->getNode(); + + if (!N || !N->isMachineOpcode() || !SU->NumSuccs) + return PDiff; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + --PDiff; + } + return PDiff; +} + void RegReductionPQBase::ScheduledNode(SUnit *SU) { if (!TracksRegPressure) return; + if (!SU->getNode()) + return; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) @@ -1758,6 +1925,8 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { return; const SDNode *N = SU->getNode(); + if (!N) return; + if (!N->isMachineOpcode()) { if (N->getOpcode() != ISD::CopyToReg) return; @@ -1871,7 +2040,29 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are /// CopyToReg to a virtual register. This SU def is probably a liveout and /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { @@ -1893,20 +2084,67 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { return RetVal; } -/// UnitsSharePred - Return true if the two scheduling units share a common -/// data predecessor. -static bool UnitsSharePred(const SUnit *left, const SUnit *right) { - SmallSet<const SUnit*, 4> Preds; - for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; + } +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - Preds.insert(I->getSUnit()); + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } } - for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - if (Preds.count(I->getSUnit())) + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; + } } return false; } @@ -1926,23 +2164,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ) { - // If the two nodes share an operand and one of them has a single - // use that is a live out copy, favor the one that is live out. Otherwise - // it will be difficult to eliminate the copy if the instruction is a - // loop induction variable update. e.g. - // BB: - // sub r1, r3, #1 - // str r0, [r2, r3] - // mov r3, r1 - // cmp - // bne BB - bool SharePred = UnitsSharePred(left, right); - // FIXME: Only adjust if BB is a loop back edge. - // FIXME: What's the cost of a copy? - int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; - int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; - int LHeight = (int)left->getHeight() - LBonus; - int RHeight = (int)right->getHeight() - RBonus; + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && BUHasStall(left, LHeight, SPQ); @@ -1953,45 +2180,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) + if (!RStall) { + DEBUG(++FactorCount[FactStall]); return 1; - if (LHeight != RHeight) + } + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactStall]); return LHeight > RHeight ? 1 : -1; - } else if (RStall) + } + } else if (RStall) { + DEBUG(++FactorCount[FactStall]); return -1; + } // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::Latency || right->SchedulingPref == Sched::Latency)) { if (DisableSchedCycles) { - if (LHeight != RHeight) + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactHeight]); return LHeight > RHeight ? 1 : -1; + } } else { // If neither instruction stalls (!LStall && !RStall) then - // it's height is already covered so only its depth matters. We also reach + // its height is already covered so only its depth matters. We also reach // this if both stall but have the same height. - unsigned LDepth = left->getDepth(); - unsigned RDepth = right->getDepth(); + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { + DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) + if (left->Latency != right->Latency) { + DEBUG(++FactorCount[FactOther]); return left->Latency > right->Latency ? 1 : -1; + } } return 0; } static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + DEBUG(++FactorCount[FactRegUses]); + #ifndef NDEBUG + const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); - if (LPriority != RPriority) + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + + if (LPriority != RPriority) { + DEBUG(++FactorCount[FactStatic]); return LPriority > RPriority; + } + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } // Try schedule def + use closer when Sethi-Ullman numbers are the same. // e.g. @@ -2012,40 +2296,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) + if (LDist != RDist) { + DEBUG(++FactorCount[FactOther]); return LDist < RDist; + } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) + if (LScratch != RScratch) { + DEBUG(++FactorCount[FactOther]); return LScratch > RScratch; + } + + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); - if (!DisableSchedCycles) { + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); if (result != 0) return result > 0; } else { - if (left->getHeight() != right->getHeight()) + if (left->getHeight() != right->getHeight()) { + DEBUG(++FactorCount[FactHeight]); return left->getHeight() > right->getHeight(); + } - if (left->getDepth() != right->getDepth()) + if (left->getDepth() != right->getDepth()) { + DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); + } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); + DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -2077,6 +2383,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { // Return true if right should be scheduled with higher priority than left. bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2086,16 +2395,18 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; } - else if (!LHigh && !RHigh) { + if (!LHigh && !RHigh) { int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); if (result != 0) return result > 0; @@ -2112,34 +2423,118 @@ bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { != ScheduleHazardRecognizer::NoHazard) return false; - return SU->getHeight() <= CurCycle; + return true; } +static bool canEnableCoalescing(SUnit *SU) { + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return true; + + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return true; + + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return true; + + return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); - bool LHigh = SPQ->HighRegPressure(left); - bool RHigh = SPQ->HighRegPressure(right); - // Avoid causing spills. If register pressure is high, schedule for - // register pressure reduction. - if (LHigh && !RHigh) - return true; - else if (!LHigh && RHigh) - return false; - else if (!LHigh && !RHigh) { - // Low register pressure situation, schedule to maximize instruction level - // parallelism. - if (left->NumPreds > right->NumPreds) - return false; - else if (left->NumPreds < right->NumPreds) - return false; + unsigned LLiveUses = 0, RLiveUses = 0; + int LPDiff = 0, RPDiff = 0; + if (!DisableSchedRegPressure || !DisableSchedLiveUses) { + LPDiff = SPQ->RegPressureDiff(left, LLiveUses); + RPDiff = SPQ->RegPressureDiff(right, RLiveUses); + } + if (!DisableSchedRegPressure && LPDiff != RPDiff) { + DEBUG(++FactorCount[FactPressureDiff]); + DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff + << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + return LPDiff > RPDiff; + } + + if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { + bool LReduce = canEnableCoalescing(left); + bool RReduce = canEnableCoalescing(right); + DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); + if (LReduce && !RReduce) return false; + if (RReduce && !LReduce) return true; + } + + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { + DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + DEBUG(++FactorCount[FactRegUses]); + return LLiveUses < RLiveUses; + } + + if (!DisableSchedStalls) { + bool LStall = BUHasStall(left, left->getHeight(), SPQ); + bool RStall = BUHasStall(right, right->getHeight(), SPQ); + if (LStall != RStall) { + DEBUG(++FactorCount[FactHeight]); + return left->getHeight() > right->getHeight(); + } + } + + if (!DisableSchedCriticalPath) { + int spread = (int)left->getDepth() - (int)right->getDepth(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum << "): " + << right->getDepth() << "\n"); + DEBUG(++FactorCount[FactDepth]); + return left->getDepth() < right->getDepth(); + } + } + + if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { + int spread = (int)left->getHeight() - (int)right->getHeight(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(++FactorCount[FactHeight]); + return left->getHeight() > right->getHeight(); + } } return BURRSort(left, right, SPQ); } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + //===----------------------------------------------------------------------===// // Preschedule for Register Pressure //===----------------------------------------------------------------------===// @@ -2417,6 +2812,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, // Top down bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res < 0; + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 477c1ffe65d3..9f2f0121a86d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,12 +27,21 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); +// This allows latency based scheduler to notice high latency instructions +// without a target itinerary. The choise if number here has more to do with +// balancing scheduler heursitics than with the actual machine latency. +static cl::opt<int> HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getTarget().getInstrItineraryData()) {} @@ -72,11 +81,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; @@ -273,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); + SmallVector<SUnit*, 8> CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); @@ -325,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { if (!HasGlueUse) break; } + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. @@ -338,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } } void ScheduleDAGSDNodes::AddSchedEdges() { @@ -403,6 +440,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { @@ -410,11 +451,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } @@ -437,6 +482,10 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Initialize NumNodeDefs for the current Node's opcode. void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + if (!Node->isMachineOpcode()) { if (Node->getOpcode() == ISD::CopyFromReg) NodeNumDefs = 1; @@ -499,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; @@ -506,7 +565,11 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } if (!InstrItins || InstrItins->isEmpty()) { - SU->Latency = 1; + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; return; } @@ -573,7 +636,7 @@ namespace { }; } -/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index cc7310e4ca42..b5f68f3055cf 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -80,6 +80,12 @@ namespace llvm { /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is + /// CopyToReg and its only active data operands are CopyFromReg within a + /// single block loop. + /// + void InitVRegCycleFlag(SUnit *SU); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9120288921e2..c2711c8097d0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1418,9 +1418,9 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. -SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { +SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(OpTy); + MVT ShTy = TLI.getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -2482,6 +2482,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // sext(undef) = 0, because the top bits will all be the same. + return getConstant(0, VT); break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2496,6 +2499,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // zext(undef) = 0, because the top bits will be zero. + return getConstant(0, VT); break; case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2512,6 +2518,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); // (ext (trunx x)) -> x if (OpOpcode == ISD::TRUNCATE) { @@ -5904,7 +5912,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bit_convert"; + case ISD::BITCAST: return "bitcast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -6226,6 +6234,9 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, return; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; OS << '\n'; printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); } @@ -6238,7 +6249,7 @@ void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { // Don't print impossibly deep things. - printrWithDepth(OS, G, 100); + printrWithDepth(OS, G, 10); } void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { @@ -6247,7 +6258,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { void SDNode::dumprFull(const SelectionDAG *G) const { // Don't print impossibly deep things. - dumprWithDepth(G, 100); + dumprWithDepth(G, 10); } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { @@ -6311,7 +6322,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 48d9bbb5132e..b02a7b66c496 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -50,7 +50,6 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision", // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer -static cl::opt<unsigned> -MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), - cl::init(64), cl::Hidden); +static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, @@ -1130,15 +1127,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; - // FIXME: C calling convention requires the return type to be promoted - // to at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); @@ -1153,9 +1143,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Flags.setInReg(); // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) + if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { @@ -2029,9 +2019,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / + volatile double RDensity = + (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place @@ -4039,10 +4033,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBB; - if (MBB != &MF.front()) - return false; - unsigned Reg = 0; if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. @@ -4413,7 +4403,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_dispatch_setup: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); + getRoot())); return 0; } @@ -4682,9 +4672,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; - case Intrinsic::trap: - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + case Intrinsic::trap: { + StringRef TrapFuncName = getTrapFunctionName(); + if (TrapFuncName.empty()) { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + std::pair<SDValue, SDValue> Result = + TLI.LowerCallTo(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, getCurDebugLoc()); + DAG.setRoot(Result.second); return 0; + } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: @@ -4937,15 +4940,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size())); - } - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. - if (Result.second.getNode()) - DAG.setRoot(Result.second); - else + // Assign order to nodes here. If the call does not produce a result, it won't + // be mapped to a SDNode and visit() will not assign it an order number. + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. HasTailCall = true; + ++SDNodeOrder; + AssignOrderingToNode(DAG.getRoot().getNode()); + } else { + DAG.setRoot(Result.second); + ++SDNodeOrder; + AssignOrderingToNode(Result.second.getNode()); + } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This @@ -5211,12 +5220,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -namespace llvm { +namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : - public TargetLowering::AsmOperandInfo { +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. @@ -5304,7 +5312,7 @@ private: typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; -} // end llvm namespace. +} // end anonymous namespace /// isAllocatableRegister - If the specified register is safe to allocate, /// i.e. it isn't a stack pointer or some other special register, return the @@ -5363,11 +5371,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, /// OpInfo describes the operand. /// Input and OutputRegs are the set of already allocated physical registers. /// -void SelectionDAGBuilder:: -GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs) { - LLVMContext &Context = FuncInfo.Fn->getContext(); +static void GetRegistersForValue(SelectionDAG &DAG, + const TargetLowering &TLI, + DebugLoc DL, + SDISelAsmOperandInfo &OpInfo, + std::set<unsigned> &OutputRegs, + std::set<unsigned> &InputRegs) { + LLVMContext &Context = *DAG.getContext(); // Compute whether this value requires an input register, an output register, // or both. @@ -5413,7 +5423,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5423,7 +5433,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } @@ -5694,7 +5704,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5705,7 +5716,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6181,7 +6193,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In - // that case, nothing will actualy look at the value. + // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); @@ -6397,7 +6409,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->setValue(I, Res); // If this argument is live outside of the entry block, insert a copy from - // whereever we got it to the vreg that other BB's will reference it as. + // wherever we got it to the vreg that other BB's will reference it as. SDB->CopyToExportRegsIfNeeded(I); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8f466d913bbb..a689b76cdc88 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -23,7 +23,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include <vector> -#include <set> namespace llvm { @@ -60,7 +59,6 @@ class MDNode; class PHINode; class PtrToIntInst; class ReturnInst; -class SDISelAsmOperandInfo; class SDDbgValue; class SExtInst; class SelectInst; @@ -380,10 +378,6 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - - void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set<unsigned> &OutputRegs, - std::set<unsigned> &InputRegs); void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68ba966d268a..fdf3767d8c65 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -421,10 +421,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -void -SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall) { +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. Terminators // are handled below. @@ -438,7 +437,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); - return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -489,13 +487,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; std::string BlockName; + int BlockNumber = -1; +#ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) +#endif + { + BlockNumber = FuncInfo->MBB->getNumber(); BlockName = MF->getFunction()->getNameStr() + ":" + FuncInfo->MBB->getBasicBlock()->getNameStr(); - - DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); + } + DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -505,7 +509,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -518,7 +523,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) @@ -531,8 +537,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; - CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); } { @@ -556,8 +562,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; - CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" + << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); @@ -567,7 +573,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -577,7 +584,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -591,7 +599,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -632,7 +641,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } void SelectionDAGISel::DoInstructionSelection() { - DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(errs() << "===== Instruction selection begins: BB#" + << FuncInfo->MBB->getNumber() + << " '" << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -735,16 +746,49 @@ void SelectionDAGISel::PrepareEHLandingPad() { - +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, FastISel *FastIS) { + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; - // Figure out which vreg this is going into. + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. unsigned LoadReg = FastIS->getRegForValue(LI); - assert(LoadReg && "Load isn't already assigned a vreg? "); + if (LoadReg == 0) + return false; // Check to see what the uses of this vreg are. If it has no uses, or more // than one use (at the machine instr level) then we can't fold it. @@ -764,7 +808,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, "The only use of the vreg must be a use, we haven't emitted the def!"); MachineInstr *User = &*RI; - + // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes, make // sure they get inserted in a logical place before the new instruction. @@ -817,6 +861,17 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) { } #endif +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -843,15 +898,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I)); - } } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa<PHINode>(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa<PHINode>(I); ++I) FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I)); - } } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -899,10 +952,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (!Inst->mayWriteToMemory() && - !isa<TerminatorInst>(Inst) && - !isa<DbgInfoIntrinsic>(Inst) && - !FuncInfo->isExportedInst(Inst)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) continue; // Bottom-up: reset the insert pos at the top, after any local-value @@ -911,16 +961,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { - // If fast isel succeeded, check to see if there is a single-use - // non-volatile load right before the selected instruction, and see if - // the load is used by the instruction. If so, try to fold it. - const Instruction *BeforeInst = 0; - if (Inst != Begin) - BeforeInst = llvm::prior(llvm::prior(BI)); - if (BeforeInst && isa<LoadInst>(BeforeInst) && - BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) - --BI; // If we succeeded, don't re-select the load. + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); continue; } @@ -974,11 +1028,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { else ++NumFastIselBlocks; - // Run SelectionDAG instruction selection on the remainder of the block - // not handled by FastISel. If FastISel is not run, this is the entire - // block. - bool HadTailCall; - SelectBasicBlock(Begin, BI, HadTailCall); + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); @@ -2392,6 +2448,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->getRegister(RegNo, VT), (SDNode*)0)); continue; } + case OPC_EmitRegister2: { + // For targets w/ more than 256 register names, the register enum + // values are stored in two bytes in the matcher table (just like + // opcodes). + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RegNo |= MatcherTable[MatcherIndex++] << 8; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + continue; + } case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 76eb9453561e..cd1647b17b9b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -90,7 +90,8 @@ namespace llvm { /// If you want to override the dot attributes printed for a particular /// edge, override this method. template<typename EdgeIter> - static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const SelectionDAG *Graph) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); EVT VT = Op.getValueType(); if (VT == MVT::Glue) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35b847ccabfb..15606af787f8 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -93,6 +93,19 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::UREM_I32] = "__umodsi3"; Names[RTLIB::UREM_I64] = "__umoddi3"; Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + Names[RTLIB::NEG_I32] = "__negsi2"; Names[RTLIB::NEG_I64] = "__negdi2"; Names[RTLIB::ADD_F32] = "__addsf3"; @@ -1665,6 +1678,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); if (!ShAmt) break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = + TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + } + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); @@ -1678,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), NewTrunc, - In.getOperand(1))); + Shift)); } break; } @@ -1829,7 +1849,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; - LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1840,12 +1859,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } - if (isa<ConstantSDNode>(N0.getNode())) { - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + if (isa<ConstantSDNode>(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - } - + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1898,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. } + // (zext x) == C --> x == (trunc C) + if (DCI.isBeforeLegalize() && N0->hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreZExt; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreZExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreZExt = N0->getOperand(0); + } + } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreZExt = N0; + } + } + + // Make sure we're not loosing bits from the constant. + if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1936,7 +1990,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } if (bestWidth) { - EVT newVT = EVT::getIntegerVT(Context, bestWidth); + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound()) { EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); @@ -3174,26 +3228,39 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1)); - APInt::mu magics = N1C->getAPIntValue().magicu(); + const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); + APInt::mu magics = N1C.magicu(); + + SDValue Q = N->getOperand(0); + + // If the divisor is even, we can avoid using the expensive fixup by shifting + // the divided value upfront. + if (magics.a != 0 && !N1C[0]) { + unsigned Shift = N1C.countTrailingZeros(); + Q = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); + if (Created) + Created->push_back(Q.getNode()); + + // Get magic number for the shifted divisor. + magics = N1C.lshr(Shift).magicu(Shift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - SDValue Q; if (isOperationLegalOrCustom(ISD::MULHU, VT)) - Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) - Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), - N->getOperand(0), - DAG.getConstant(magics.m, VT)).getNode(), 1); + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, + DAG.getConstant(magics.m, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent if (Created) Created->push_back(Q.getNode()); if (magics.a == 0) { - assert(magics.s < N1C->getAPIntValue().getBitWidth() && + assert(magics.s < N1C.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 7b5bca495206..160f38f69236 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -277,7 +277,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { // Initialize data flow sets. clearAnticAvailSets(); - // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. bool changed = true; unsigned iterations = 0; while (changed) { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 2843c1a5b6d8..35b8e14ddc61 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -104,6 +104,18 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) { + /// Joined copies are not deleted immediately, but kept in JoinedCopies. + JoinedCopies.insert(CopyMI); + + /// Mark all register operands of CopyMI as <undef> so they won't affect dead + /// code elimination. + for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), + E = CopyMI->operands_end(); I != E; ++I) + if (I->isReg()) + I->setIsUndef(true); +} + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, @@ -196,15 +208,14 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, if (ValLR+1 != BLR) return false; // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && - *tri_->getSubRegisters(IntB.reg)) { - for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); }); return false; } @@ -471,7 +482,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - JoinedCopies.insert(UseMI); + markAsJoined(UseMI); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition @@ -901,6 +912,58 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } +/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. +/// We need to be careful about coalescing a source physical register with a +/// virtual register. Once the coalescing is done, it cannot be broken and these +/// are not spillable! If the destination interval uses are far away, think +/// twice about coalescing them! +bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + if (DisablePhysicalJoin) { + DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); + return false; + } + + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial()) { + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold) { + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + return false; + } + } + return true; +} /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. @@ -973,27 +1036,25 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } - if (DisablePhysicalJoin && CP.isPhys()) { - DEBUG(dbgs() << "\tPhysical joins disabled.\n"); - return false; - } - - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)); + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) + << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << "\n"); // Enforce policies. if (CP.isPhys()) { - DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n"); - // Only coalesce to allocatable physreg. - if (!li_->isAllocatable(CP.getDstReg())) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. + if (!shouldJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + return false; } } else { - DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) - << " to " << CP.getNewRC()->getName() << "\n"); - // Avoid constraining virtual register regclass too much. if (CP.isCrossClass()) { + DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); if (DisableCrossClassJoin) { DEBUG(dbgs() << "\tCross-class joins disabled.\n"); return false; @@ -1002,8 +1063,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->getRegClass(CP.getSrcReg()), mri_->getRegClass(CP.getDstReg()), CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << CP.getNewRC()->getName() << ".\n"); + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1015,45 +1075,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { CP.flip(); } - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial() && CP.isPhys()) { - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } - - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), - mri_->use_nodbg_end()) * Threshold < Length) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI)) - return true; - - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - // Okay, attempt to join these two intervals. On failure, this returns false. // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have @@ -1072,7 +1093,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!CP.isPartial()) { if (AdjustCopiesBackFrom(CP, CopyMI) || RemoveCopyByCommutingDef(CP, CopyMI)) { - JoinedCopies.insert(CopyMI); + markAsJoined(CopyMI); DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1092,7 +1113,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } // Remember to delete the copy instruction. - JoinedCopies.insert(CopyMI); + markAsJoined(CopyMI); UpdateRegDefsUses(CP); @@ -1568,9 +1589,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, if (UseMI->isIdentityCopy()) continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); - // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something - // that compares higher than any other interval. - if (Idx >= Start && Idx < End && Idx >= UseIdx) { + if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) { LastUse = &Use; UseIdx = Idx.getUseIndex(); } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 56703dfa2ddd..65cf542836dd 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -68,16 +68,6 @@ namespace llvm { initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); } - struct InstrSlots { - enum { - LOAD = 0, - USE = 1, - DEF = 2, - STORE = 3, - NUM = 4 - }; - }; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void releaseMemory(); @@ -148,6 +138,9 @@ namespace llvm { unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI); + /// shouldJoinPhys - Return true if a physreg copy should be joined. + bool shouldJoinPhys(CoalescerPair &CP); + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool isWinToJoinCrossClass(unsigned SrcReg, @@ -186,6 +179,9 @@ namespace llvm { /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; + + /// markAsJoined - Remember that CopyMI has already been joined. + void markAsJoined(MachineInstr *CopyMI); }; } // End llvm namespace diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 13e1454fa5f3..43904a76cf13 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -442,25 +442,18 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { BasicBlock *DispatchBlock = BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - // Add a call to dispatch_setup at the start of the dispatch block. This is - // expanded to any target-specific setup that needs to be done. - Value *SetupArg = - CastInst::Create(Instruction::BitCast, FunctionContext, - Type::getInt8PtrTy(F.getContext()), "", - DispatchBlock); - CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock); - // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we go to a block that just does an unwind (which is the - // correct action for a standard call). - BasicBlock *UnwindBlock = - BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + // value. By default, we issue a trap statement. + BasicBlock *TrapBlock = + BasicBlock::Create(F.getContext(), "trapbb", &F); + CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), + "", TrapBlock); + new UnreachableInst(F.getContext(), TrapBlock); Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, DispatchBlock); SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), DispatchBlock); // Split the entry block to insert the conditional branch for the setjmp. BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), @@ -524,6 +517,11 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); + + // Add a call to dispatch_setup after the setjmp call. This is expanded to any + // target-specific setup that needs to be done. + CallInst::Create(DispatchSetupFn, "", EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), ICmpInst::ICMP_EQ, DispatchVal, Zero, @@ -564,7 +562,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Replace all unwinds with a branch to the unwind handler. // ??? Should this ever happen with sjlj exceptions? for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(UnwindBlock, Unwinds[i]); + BranchInst::Create(TrapBlock, Unwinds[i]); Unwinds[i]->eraseFromParent(); } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 6e3fa90e4341..ca79cafcf4be 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -10,47 +10,20 @@ #define DEBUG_TYPE "slotindexes" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; - -// Yep - these are thread safe. See the header for details. -namespace { - - - class EmptyIndexListEntry : public IndexListEntry { - public: - EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {} - }; - - class TombstoneIndexListEntry : public IndexListEntry { - public: - TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {} - }; - - // The following statics are thread safe. They're read only, and you - // can't step from them to any other list entries. - ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey; - ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey; -} - char SlotIndexes::ID = 0; INITIALIZE_PASS(SlotIndexes, "slotindexes", "Slot index numbering", false, false) -IndexListEntry* IndexListEntry::getEmptyKeyEntry() { - return &*IndexListEntryEmptyKey; -} - -IndexListEntry* IndexListEntry::getTombstoneKeyEntry() { - return &*IndexListEntryTombstoneKey; -} - +STATISTIC(NumLocalRenum, "Number of local renumberings"); +STATISTIC(NumGlobalRenum, "Number of global renumberings"); void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); @@ -59,7 +32,7 @@ void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { void SlotIndexes::releaseMemory() { mi2iMap.clear(); - mbb2IdxMap.clear(); + MBBRanges.clear(); idx2MBBMap.clear(); clearList(); } @@ -85,13 +58,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { "Index list non-empty at initial numbering?"); assert(idx2MBBMap.empty() && "Index -> MBB mapping non-empty at initial numbering?"); - assert(mbb2IdxMap.empty() && + assert(MBBRanges.empty() && "MBB -> Index mapping non-empty at initial numbering?"); assert(mi2iMap.empty() && "MachineInstr -> Index mapping non-empty at initial numbering?"); functionSize = 0; unsigned index = 0; + MBBRanges.resize(mf->getNumBlockIDs()); + idx2MBBMap.reserve(mf->size()); push_back(createEntry(0, index)); @@ -103,8 +78,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // Insert an index for the MBB start. SlotIndex blockStartIndex(back(), SlotIndex::LOAD); - index += SlotIndex::NUM; - for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { MachineInstr *mi = miItr; @@ -112,32 +85,19 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - push_back(createEntry(mi, index)); + push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert( - std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); ++functionSize; - - unsigned Slots = mi->getDesc().getNumDefs(); - if (Slots == 0) - Slots = 1; - - index += (Slots + 1) * SlotIndex::NUM; } - // We insert two blank instructions between basic blocks. - // One to represent live-out registers and one to represent live-ins. - push_back(createEntry(0, index)); - index += SlotIndex::NUM; - - push_back(createEntry(0, index)); - - SlotIndex blockEndIndex(back(), SlotIndex::LOAD); - mbb2IdxMap.insert( - std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex))); + // We insert one blank instructions between basic blocks. + push_back(createEntry(0, index += SlotIndex::InstrDist)); + MBBRanges[mbb->getNumber()].first = blockStartIndex; + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -151,38 +111,41 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } void SlotIndexes::renumberIndexes() { - // Renumber updates the index of every element of the index list. - // If all instrs in the function have been allocated an index (which has been - // placed in the index list in the order of instruction iteration) then the - // resulting numbering will match what would have been generated by the - // pass during the initial numbering of the function if the new instructions - // had been present. DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); + ++NumGlobalRenum; - functionSize = 0; unsigned index = 0; for (IndexListEntry *curEntry = front(); curEntry != getTail(); curEntry = curEntry->getNext()) { - curEntry->setIndex(index); - - if (curEntry->getInstr() == 0) { - // MBB start entry. Just step index by 1. - index += SlotIndex::NUM; - } - else { - ++functionSize; - unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs(); - if (Slots == 0) - Slots = 1; - - index += (Slots + 1) * SlotIndex::NUM; - } + index += SlotIndex::InstrDist; } } +// Renumber indexes locally after curEntry was inserted, but failed to get a new +// index. +void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) { + // Number indexes with half the default spacing so we can catch up quickly. + const unsigned Space = SlotIndex::InstrDist/2; + assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + + IndexListEntry *start = curEntry->getPrev(); + unsigned index = start->getIndex(); + IndexListEntry *tail = getTail(); + do { + curEntry->setIndex(index += Space); + curEntry = curEntry->getNext(); + // If the next index is bigger, we have caught up. + } while (curEntry != tail && curEntry->getIndex() <= index); + + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-' + << index << " ***\n"); + ++NumLocalRenum; +} + + void SlotIndexes::dump() const { for (const IndexListEntry *itr = front(); itr != getTail(); itr = itr->getNext()) { @@ -195,11 +158,9 @@ void SlotIndexes::dump() const { } } - for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); - itr != mbb2IdxMap.end(); ++itr) { - dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" - << itr->second.first << ", " << itr->second.second << "]\n"; - } + for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) + dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + << MBBRanges[i].second << ")\n"; } // Print a SlotIndex to a raw_ostream. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 9c0bf1629a14..694961863261 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -67,11 +67,11 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle. + /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. /// Ideally, these two numbers should be identical, but inaccuracies in the /// block frequency estimates means that we need to normalize ingoing and /// outgoing frequencies separately so they are commensurate. - float Frequency[2]; + float Scale[2]; /// Bias - Normalized contributions from non-transparent blocks. /// A bundle connected to a MustSpill block has a huge negative bias, @@ -107,7 +107,7 @@ struct SpillPlacement::Node { /// Node - Create a blank Node. Node() { - Frequency[0] = Frequency[1] = 0; + Scale[0] = Scale[1] = 0; } /// clear - Reset per-query data, but preserve frequencies that only depend on @@ -121,7 +121,7 @@ struct SpillPlacement::Node { /// out=0 for an ingoing link, and 1 for an outgoing link. void addLink(unsigned b, float w, bool out) { // Normalize w relative to all connected blocks from that direction. - w /= Frequency[out]; + w *= Scale[out]; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,9 +134,10 @@ struct SpillPlacement::Node { } /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. + /// Return the change to the total number of positive biases. void addBias(float w, bool out) { // Normalize w relative to all connected blocks from that direction. - w /= Frequency[out]; + w *= Scale[out]; Bias += w; } @@ -175,13 +176,22 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. + BlockFrequency.resize(mf.getNumBlockIDs()); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = getBlockFrequency(I); + float Freq = LiveIntervals::getSpillWeight(true, false, + loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq; + BlockFrequency[Num] = Freq; + nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; + nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; } + // Scales are reciprocal frequencies. + for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) + for (unsigned d = 0; d != 2; ++d) + if (nodes[i].Scale[d] > 0) + nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; + // We never change the function. return false; } @@ -200,31 +210,12 @@ void SpillPlacement::activate(unsigned n) { } -/// prepareNodes - Compute node biases and weights from a set of constraints. +/// addConstraints - Compute node biases and weights from a set of constraints. /// Set a bit in NodeMask for each active node. -void SpillPlacement:: -prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) { - for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(), +void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { + for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number); - float Freq = getBlockFrequency(MBB); - - // Is this a transparent block? Link ingoing and outgoing bundles. - if (I->Entry == DontCare && I->Exit == DontCare) { - unsigned ib = bundles->getBundle(I->Number, 0); - unsigned ob = bundles->getBundle(I->Number, 1); - - // Ignore self-loops. - if (ib == ob) - continue; - activate(ib); - activate(ob); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); - continue; - } - - // This block is not transparent, but it can still add bias. + float Freq = getBlockFrequency(I->Number); const float Bias[] = { 0, // DontCare, 1, // PrefReg, @@ -248,10 +239,54 @@ prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) { } } +void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { + for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; + ++I) { + unsigned Number = *I; + unsigned ib = bundles->getBundle(Number, 0); + unsigned ob = bundles->getBundle(Number, 1); + + // Ignore self-loops. + if (ib == ob) + continue; + activate(ib); + activate(ob); + if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) + Linked.push_back(ib); + if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) + Linked.push_back(ob); + float Freq = getBlockFrequency(Number); + nodes[ib].addLink(ob, Freq, 1); + nodes[ob].addLink(ib, Freq, 0); + } +} + +bool SpillPlacement::scanActiveBundles() { + Linked.clear(); + RecentPositive.clear(); + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + nodes[n].update(nodes); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (nodes[n].mustSpill()) + continue; + if (!nodes[n].Links.empty()) + Linked.push_back(n); + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + return !RecentPositive.empty(); +} + /// iterate - Repeatedly update the Hopfield nodes until stability or the /// maximum number of iterations is reached. /// @param Linked - Numbers of linked nodes that need updating. -void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { +void SpillPlacement::iterate() { + // First update the recently positive nodes. They have likely received new + // negative bias that will turn them off. + while (!RecentPositive.empty()) + nodes[RecentPositive.pop_back_val()].update(nodes); + if (Linked.empty()) return; @@ -267,10 +302,13 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { for (SmallVectorImpl<unsigned>::const_reverse_iterator I = llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; // Scan forwards, skipping the first node which was just updated. @@ -278,53 +316,37 @@ void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { for (SmallVectorImpl<unsigned>::const_iterator I = llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; } } -bool -SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks, - BitVector &RegBundles) { +void SpillPlacement::prepare(BitVector &RegBundles) { + Linked.clear(); + RecentPositive.clear(); // Reuse RegBundles as our ActiveNodes vector. ActiveNodes = &RegBundles; ActiveNodes->clear(); ActiveNodes->resize(bundles->getNumBundles()); +} - // Compute active nodes, links and biases. - prepareNodes(LiveBlocks); - - // Update all active nodes, and find the ones that are actually linked to - // something so their value may change when iterating. - SmallVector<unsigned, 8> Linked; - for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) { - nodes[n].update(nodes); - // A node that must spill, or a node without any links is not going to - // change its value ever again, so exclude it from iterations. - if (!nodes[n].Links.empty() && !nodes[n].mustSpill()) - Linked.push_back(n); - } - - // Iterate the network to convergence. - iterate(Linked); +bool +SpillPlacement::finish() { + assert(ActiveNodes && "Call prepare() first"); - // Write preferences back to RegBundles. + // Write preferences back to ActiveNodes. bool Perfect = true; - for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) if (!nodes[n].preferReg()) { - RegBundles.reset(n); + ActiveNodes->reset(n); Perfect = false; } + ActiveNodes = 0; return Perfect; } - -/// getBlockFrequency - Return our best estimate of the block frequency which is -/// the expected number of block executions per function invocation. -float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) { - // Use the unnormalized spill weight for real block frequencies. - return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB)); -} - diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index ef2d516cdce7..6952ad800965 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -10,8 +10,8 @@ // This analysis computes the optimal spill code placement between basic blocks. // // The runOnMachineFunction() method only precomputes some profiling information -// about the CFG. The real work is done by placeSpills() which is called by the -// register allocator. +// about the CFG. The real work is done by prepare(), addConstraints(), and +// finish() which are called by the register allocator. // // Given a variable that is live across multiple basic blocks, and given // constraints on the basic blocks where the variable is live, determine which @@ -27,6 +27,8 @@ #ifndef LLVM_CODEGEN_SPILLPLACEMENT_H #define LLVM_CODEGEN_SPILLPLACEMENT_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -35,7 +37,6 @@ class BitVector; class EdgeBundles; class MachineBasicBlock; class MachineLoopInfo; -template <typename> class SmallVectorImpl; class SpillPlacement : public MachineFunctionPass { struct Node; @@ -44,10 +45,20 @@ class SpillPlacement : public MachineFunctionPass { const MachineLoopInfo *loops; Node *nodes; - // Nodes that are active in the current computation. Owned by the placeSpills + // Nodes that are active in the current computation. Owned by the prepare() // caller. BitVector *ActiveNodes; + // Nodes with active links. Populated by scanActiveBundles. + SmallVector<unsigned, 8> Linked; + + // Nodes that went positive during the last call to scanActiveBundles or + // iterate. + SmallVector<unsigned, 8> RecentPositive; + + // Block frequencies are computed once. Indexed by block number. + SmallVector<float, 4> BlockFrequency; + public: static char ID; // Pass identification, replacement for typeid. @@ -70,28 +81,53 @@ public: BorderConstraint Exit : 8; ///< Constraint on block exit. }; - /// placeSpills - Compute the optimal spill code placement given the - /// constraints. No MustSpill constraints will be violated, and the smallest - /// possible number of PrefX constraints will be violated, weighted by - /// expected execution frequencies. - /// @param LiveBlocks Constraints for blocks that have the variable live in or - /// live out. DontCare/DontCare means the variable is live - /// through the block. DontCare/X means the variable is live - /// out, but not live in. + /// prepare - Reset state and prepare for a new spill placement computation. /// @param RegBundles Bit vector to receive the edge bundles where the /// variable should be kept in a register. Each bit /// corresponds to an edge bundle, a set bit means the /// variable should be kept in a register through the /// bundle. A clear bit means the variable should be - /// spilled. + /// spilled. This vector is retained. + void prepare(BitVector &RegBundles); + + /// addConstraints - Add constraints and biases. This method may be called + /// more than once to accumulate constraints. + /// @param LiveBlocks Constraints for blocks that have the variable live in or + /// live out. + void addConstraints(ArrayRef<BlockConstraint> LiveBlocks); + + /// addLinks - Add transparent blocks with the given numbers. + void addLinks(ArrayRef<unsigned> Links); + + /// scanActiveBundles - Perform an initial scan of all bundles activated by + /// addConstraints and addLinks, updating their state. Add all the bundles + /// that now prefer a register to RecentPositive. + /// Prepare internal data structures for iterate. + /// Return true is there are any positive nodes. + bool scanActiveBundles(); + + /// iterate - Update the network iteratively until convergence, or new bundles + /// are found. + void iterate(); + + /// getRecentPositive - Return an array of bundles that became positive during + /// the previous call to scanActiveBundles or iterate. + ArrayRef<unsigned> getRecentPositive() { return RecentPositive; } + + /// finish - Compute the optimal spill code placement given the + /// constraints. No MustSpill constraints will be violated, and the smallest + /// possible number of PrefX constraints will be violated, weighted by + /// expected execution frequencies. + /// The selected bundles are returned in the bitvector passed to prepare(). /// @return True if a perfect solution was found, allowing the variable to be /// in a register through all relevant bundles. - bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks, - BitVector &RegBundles); + bool finish(); /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(const MachineBasicBlock*); + float getBlockFrequency(unsigned Number) const { + return BlockFrequency[Number]; + } private: virtual bool runOnMachineFunction(MachineFunction&); @@ -99,8 +135,6 @@ private: virtual void releaseMemory(); void activate(unsigned); - void prepareNodes(const SmallVectorImpl<BlockConstraint>&); - void iterate(const SmallVectorImpl<unsigned>&); }; } // end namespace llvm diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index fd385824aff9..b6bbcd7176dd 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -11,6 +11,7 @@ #include "Spiller.h" #include "VirtRegMap.h" +#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <set> using namespace llvm; @@ -180,11 +180,9 @@ public: VirtRegMap &vrm) : SpillerBase(pass, mf, vrm) {} - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &) { + void spill(LiveRangeEdit &LRE) { // Ignore spillIs - we don't use it. - trivialSpillEverywhere(li, newIntervals); + trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs()); } }; @@ -210,22 +208,22 @@ public: vrm(&vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs) { + void spill(LiveRangeEdit &LRE) { std::vector<LiveInterval*> added = - lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); - newIntervals.insert(newIntervals.end(), added.begin(), added.end()); + lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), + loopInfo, *vrm); + LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), + added.begin(), added.end()); // Update LiveStacks. - int SS = vrm->getStackSlot(li->reg); + int SS = vrm->getStackSlot(LRE.getReg()); if (SS == VirtRegMap::NO_STACK_SLOT) return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg); + const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); LiveInterval &SI = lss->getOrCreateInterval(SS, RC); if (!SI.hasAtLeastOneValue()) SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0)); + SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); } }; diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index f017583494ed..41f1727da439 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -12,11 +12,9 @@ namespace llvm { - class LiveInterval; + class LiveRangeEdit; class MachineFunction; class MachineFunctionPass; - class SlotIndex; - template <typename T> class SmallVectorImpl; class VirtRegMap; /// Spiller interface. @@ -27,16 +25,8 @@ namespace llvm { public: virtual ~Spiller() = 0; - /// spill - Spill the given live interval. The method used will depend on - /// the Spiller implementation selected. - /// - /// @param li The live interval to be spilled. - /// @param spillIs A list of intervals that are about to be spilled, - /// and so cannot be used for remat etc. - /// @param newIntervals The newly created intervals will be appended here. - virtual void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - const SmallVectorImpl<LiveInterval*> &spillIs) = 0; + /// spill - Spill the LRE.getParent() live interval. + virtual void spill(LiveRangeEdit &LRE) = 0; }; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index fd5d50b7ecb8..ac9d72bf62c9 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -16,12 +16,11 @@ #include "SplitKit.h" #include "LiveRangeEdit.h" #include "VirtRegMap.h" -#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -29,9 +28,8 @@ using namespace llvm; -static cl::opt<bool> -AllowSplit("spiller-splits-edges", - cl::desc("Allow critical edge splitting during spilling")); +STATISTIC(NumFinished, "Number of splits finished"); +STATISTIC(NumSimple, "Number of splits that were simple"); //===----------------------------------------------------------------------===// // Split Analysis @@ -45,49 +43,105 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, LIS(lis), Loops(mli), TII(*MF.getTarget().getInstrInfo()), - CurLI(0) {} + CurLI(0), + LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); - UsingInstrs.clear(); - UsingBlocks.clear(); - LiveBlocks.clear(); + UseBlocks.clear(); + ThroughBlocks.clear(); CurLI = 0; } -bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { - MachineBasicBlock *T, *F; - SmallVector<MachineOperand, 4> Cond; - return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); +SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { + const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); + std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; + + // Compute split points on the first call. The pair is independent of the + // current live interval. + if (!LSP.first.isValid()) { + MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); + if (FirstTerm == MBB->end()) + LSP.first = LIS.getMBBEndIdx(MBB); + else + LSP.first = LIS.getInstructionIndex(FirstTerm); + + // If there is a landing pad successor, also find the call instruction. + if (!LPad) + return LSP.first; + // There may not be a call instruction (?) in which case we ignore LPad. + LSP.second = LSP.first; + for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin(); + I != E; --I) + if (I->getDesc().isCall()) { + LSP.second = LIS.getInstructionIndex(I); + break; + } + } + + // If CurLI is live into a landing pad successor, move the last split point + // back to the call that may throw. + if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad)) + return LSP.second; + else + return LSP.first; } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. void SplitAnalysis::analyzeUses() { + assert(UseSlots.empty() && "Call clear first"); + + // First get all the defs from the interval values. This provides the correct + // slots for early clobbers. + for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(), + E = CurLI->vni_end(); I != E; ++I) + if (!(*I)->isPHIDef() && !(*I)->isUnused()) + UseSlots.push_back((*I)->def); + + // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg), - E = MRI.reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - if (MO.isUse() && MO.isUndef()) - continue; - MachineInstr *MI = MO.getParent(); - if (MI->isDebugValue() || !UsingInstrs.insert(MI)) - continue; - UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex()); - MachineBasicBlock *MBB = MI->getParent(); - UsingBlocks[MBB]++; - } + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; + ++I) + if (!I.getOperand().isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + array_pod_sort(UseSlots.begin(), UseSlots.end()); - calcLiveBlockInfo(); - DEBUG(dbgs() << " counted " - << UsingInstrs.size() << " instrs, " - << UsingBlocks.size() << " blocks.\n"); + + // Remove duplicates, keeping the smaller slot for each instruction. + // That is what we want for early clobbers. + UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(), + SlotIndex::isSameInstr), + UseSlots.end()); + + // Compute per-live block info. + if (!calcLiveBlockInfo()) { + // FIXME: calcLiveBlockInfo found inconsistencies in the live range. + // I am looking at you, SimpleRegisterCoalescing! + DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); + const_cast<LiveIntervals&>(LIS) + .shrinkToUses(const_cast<LiveInterval*>(CurLI)); + UseBlocks.clear(); + ThroughBlocks.clear(); + bool fixed = calcLiveBlockInfo(); + (void)fixed; + assert(fixed && "Couldn't fix broken live interval"); + } + + DEBUG(dbgs() << "Analyze counted " + << UseSlots.size() << " instrs in " + << UseBlocks.size() << " blocks, through " + << NumThroughBlocks << " blocks.\n"); } /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks /// where CurLI is live. -void SplitAnalysis::calcLiveBlockInfo() { +bool SplitAnalysis::calcLiveBlockInfo() { + ThroughBlocks.resize(MF.getNumBlockIDs()); + NumThroughBlocks = 0; if (CurLI->empty()) - return; + return true; LiveInterval::const_iterator LVI = CurLI->begin(); LiveInterval::const_iterator LVE = CurLI->end(); @@ -104,24 +158,14 @@ void SplitAnalysis::calcLiveBlockInfo() { SlotIndex Start, Stop; tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - // The last split point is the latest possible insertion point that dominates - // all successor blocks. If interference reaches LastSplitPoint, it is not - // possible to insert a split or reload that makes CurLI live in the - // outgoing bundle. - MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB); - if (LSP == BI.MBB->end()) - BI.LastSplitPoint = Stop; - else - BI.LastSplitPoint = LIS.getInstructionIndex(LSP); - // LVI is the first live segment overlapping MBB. BI.LiveIn = LVI->start <= Start; if (!BI.LiveIn) BI.Def = LVI->start; // Find the first and last uses in the block. - BI.Uses = hasUses(MFI); - if (BI.Uses && UseI != UseE) { + bool Uses = UseI != UseE && *UseI < Stop; + if (Uses) { BI.FirstUse = *UseI; assert(BI.FirstUse >= Start); do ++UseI; @@ -149,7 +193,16 @@ void SplitAnalysis::calcLiveBlockInfo() { // Don't set LiveThrough when the block has a gap. BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut; - LiveBlocks.push_back(BI); + if (Uses) + UseBlocks.push_back(BI); + else { + ++NumThroughBlocks; + ThroughBlocks.set(BI.MBB->getNumber()); + } + // FIXME: This should never happen. The live range stops or starts without a + // corresponding use. An earlier pass did something wrong. + if (!BI.LiveThrough && !Uses) + return false; // LVI is now at LVE or LVI->end >= Stop. if (LVI == LVE) @@ -165,6 +218,30 @@ void SplitAnalysis::calcLiveBlockInfo() { else MFI = LIS.getMBBFromIndex(LVI->start); } + return true; +} + +unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { + if (cli->empty()) + return 0; + LiveInterval *li = const_cast<LiveInterval*>(cli); + LiveInterval::iterator LVI = li->begin(); + LiveInterval::iterator LVE = li->end(); + unsigned Count = 0; + + // Loop over basic blocks where li is live. + MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); + SlotIndex Stop = LIS.getMBBEndIdx(MFI); + for (;;) { + ++Count; + LVI = li->advanceTo(LVI, Stop); + if (LVI == LVE) + return Count; + do { + ++MFI; + Stop = LIS.getMBBEndIdx(MFI); + } while (Stop <= LVI->start); + } } bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { @@ -181,15 +258,6 @@ bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { return I != Orig.begin() && (--I)->end == Idx; } -void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const { - for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) { - unsigned count = UsingBlocks.lookup(*I); - OS << " BB#" << (*I)->getNumber(); - if (count) - OS << '(' << count << ')'; - } -} - void SplitAnalysis::analyze(const LiveInterval *li) { clear(); CurLI = li; @@ -198,171 +266,242 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// -// LiveIntervalMap +// Split Editor //===----------------------------------------------------------------------===// -// Work around the fact that the std::pair constructors are broken for pointer -// pairs in some implementations. makeVV(x, 0) works. -static inline std::pair<const VNInfo*, VNInfo*> -makeVV(const VNInfo *a, VNInfo *b) { - return std::make_pair(a, b); -} +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, + LiveIntervals &lis, + VirtRegMap &vrm, + MachineDominatorTree &mdt) + : SA(sa), LIS(lis), VRM(vrm), + MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), + TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + Edit(0), + OpenIdx(0), + RegAssign(Allocator) +{} -void LiveIntervalMap::reset(LiveInterval *li) { - LI = li; +void SplitEditor::reset(LiveRangeEdit &lre) { + Edit = &lre; + OpenIdx = 0; + RegAssign.clear(); Values.clear(); - LiveOutCache.clear(); + + // We don't need to clear LiveOutCache, only LiveOutSeen entries are read. + LiveOutSeen.clear(); + + // We don't need an AliasAnalysis since we will only be performing + // cheap-as-a-copy remats anyway. + Edit->anyRematerializable(LIS, TII, 0); } -bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const { - ValueMap::const_iterator i = Values.find(ParentVNI); - return i != Values.end() && i->second == 0; +void SplitEditor::dump() const { + if (RegAssign.empty()) { + dbgs() << " empty\n"; + return; + } + + for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) + dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); + dbgs() << '\n'; } -// defValue - Introduce a LI def for ParentVNI that could be later than -// ParentVNI->def. -VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { - assert(LI && "call reset first"); +VNInfo *SplitEditor::defValue(unsigned RegIdx, + const VNInfo *ParentVNI, + SlotIndex Idx) { assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); - assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); + LiveInterval *LI = Edit->get(RegIdx); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - // Preserve the PHIDef bit. - if (ParentVNI->isPHIDef() && Idx == ParentVNI->def) - VNI->setIsPHIDef(true); - // Use insert for lookup, so we can add missing values with a second lookup. - std::pair<ValueMap::iterator,bool> InsP = - Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0)); + std::pair<ValueMap::iterator, bool> InsP = + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI)); + + // This was the first time (RegIdx, ParentVNI) was mapped. + // Keep it as a simple def without any liveness. + if (InsP.second) + return VNI; - // This is now a complex def. Mark with a NULL in valueMap. - if (!InsP.second) + // If the previous value was a simple mapping, add liveness for it now. + if (VNInfo *OldVNI = InsP.first->second) { + SlotIndex Def = OldVNI->def; + LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + // No longer a simple mapping. InsP.first->second = 0; + } + + // This is a complex mapping, add liveness for VNI + SlotIndex Def = VNI->def; + LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); return VNI; } - -// mapValue - Find the mapped value for ParentVNI at Idx. -// Potentially create phi-def values. -VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, - bool *simple) { - assert(LI && "call reset first"); +void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) { assert(ParentVNI && "Mapping NULL value"); - assert(Idx.isValid() && "Invalid SlotIndex"); - assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)]; - // Use insert for lookup, so we can add missing values with a second lookup. - std::pair<ValueMap::iterator,bool> InsP = - Values.insert(makeVV(ParentVNI, 0)); - - // This was an unknown value. Create a simple mapping. - if (InsP.second) { - if (simple) *simple = true; - return InsP.first->second = LI->createValueCopy(ParentVNI, - LIS.getVNInfoAllocator()); - } + // ParentVNI was either unmapped or already complex mapped. Either way. + if (!VNI) + return; - // This was a simple mapped value. - if (InsP.first->second) { - if (simple) *simple = true; - return InsP.first->second; - } + // This was previously a single mapping. Make sure the old def is represented + // by a trivial live range. + SlotIndex Def = VNI->def; + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + VNI = 0; +} - // This is a complex mapped value. There may be multiple defs, and we may need - // to create phi-defs. - if (simple) *simple = false; +// extendRange - Extend the live range to reach Idx. +// Potentially create phi-def values. +void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { + assert(Idx.isValid() && "Invalid SlotIndex"); MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx); assert(IdxMBB && "No MBB at Idx"); + LiveInterval *LI = Edit->get(RegIdx); // Is there a def in the same MBB we can extend? - if (VNInfo *VNI = extendTo(IdxMBB, Idx)) - return VNI; + if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx)) + return; // Now for the fun part. We know that ParentVNI potentially has multiple defs, // and we may need to create even more phi-defs to preserve VNInfo SSA form. // Perform a search for all predecessor blocks where we know the dominating - // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. - DEBUG(dbgs() << "\n Reaching defs for BB#" << IdxMBB->getNumber() - << " at " << Idx << " in " << *LI << '\n'); + // VNInfo. + VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot()); + + // When there were multiple different values, we may need new PHIs. + if (!VNI) + return updateSSA(); + + // Poor man's SSA update for the single-value case. + LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]); + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->DomNode->getBlock(); + SlotIndex Start = LIS.getMBBStartIdx(MBB); + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + LiveOutCache[MBB] = LOP; + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + } + } +} + +/// findReachingDefs - Search the CFG for known live-out values. +/// Add required live-in blocks to LiveInBlocks. +VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill) { + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } // Blocks where LI should be live-in. - SmallVector<MachineDomTreeNode*, 16> LiveIn; - LiveIn.push_back(MDT[IdxMBB]); + SmallVector<MachineBasicBlock*, 16> WorkList(1, KillMBB); + + // Remember if we have seen more than one value. + bool UniqueVNI = true; + VNInfo *TheVNI = 0; // Using LiveOutCache as a visited set, perform a BFS for all reaching defs. - for (unsigned i = 0; i != LiveIn.size(); ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = WorkList[i]; + assert(!MBB->pred_empty() && "Value live-in to entry block?"); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; + LiveOutPair &LOP = LiveOutCache[Pred]; + // Is this a known live-out block? - std::pair<LiveOutMap::iterator,bool> LOIP = - LiveOutCache.insert(std::make_pair(Pred, LiveOutPair())); - // Yes, we have been here before. - if (!LOIP.second) { - DEBUG(if (VNInfo *VNI = LOIP.first->second.first) - dbgs() << " known valno #" << VNI->id - << " at BB#" << Pred->getNumber() << '\n'); + if (LiveOutSeen.test(Pred->getNumber())) { + if (VNInfo *VNI = LOP.first) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + } continue; } + // First time. LOP is garbage and must be cleared below. + LiveOutSeen.set(Pred->getNumber()); + // Does Pred provide a live-out value? - SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot(); - if (VNInfo *VNI = extendTo(Pred, Last)) { - MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def); - DEBUG(dbgs() << " found valno #" << VNI->id - << " from BB#" << DefMBB->getNumber() - << " at BB#" << Pred->getNumber() << '\n'); - LiveOutPair &LOP = LOIP.first->second; - LOP.first = VNI; - LOP.second = MDT[DefMBB]; + SlotIndex Start, Last; + tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred); + Last = Last.getPrevSlot(); + VNInfo *VNI = LI->extendInBlock(Start, Last); + LOP.first = VNI; + if (VNI) { + LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)]; + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; continue; } + LOP.second = 0; + // No, we need a live-in value for Pred as well - if (Pred != IdxMBB) - LiveIn.push_back(MDT[Pred]); + if (Pred != KillMBB) + WorkList.push_back(Pred); + else + // Loopback to KillMBB, so value is really live through. + Kill = SlotIndex(); } } - // We may need to add phi-def values to preserve the SSA form. + // Transfer WorkList to LiveInBlocks in reverse order. + // This ordering works best with updateSSA(). + LiveInBlocks.clear(); + LiveInBlocks.reserve(WorkList.size()); + while(!WorkList.empty()) + LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]); + + // The kill block may not be live-through. + assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB); + LiveInBlocks.back().Kill = Kill; + + return UniqueVNI ? TheVNI : 0; +} + +void SplitEditor::updateSSA() { // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. - VNInfo *IdxVNI = 0; unsigned Changes; do { Changes = 0; - DEBUG(dbgs() << " Iterating over " << LiveIn.size() << " blocks.\n"); - // Propagate live-out values down the dominator tree, inserting phi-defs when - // necessary. Since LiveIn was created by a BFS, going backwards makes it more - // likely for us to visit immediate dominators before their children. - for (unsigned i = LiveIn.size(); i; --i) { - MachineDomTreeNode *Node = LiveIn[i-1]; + // Propagate live-out values down the dominator tree, inserting phi-defs + // when necessary. + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineDomTreeNode *Node = I->DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; MachineBasicBlock *MBB = Node->getBlock(); MachineDomTreeNode *IDom = Node->getIDom(); LiveOutPair IDomValue; + // We need a live-in value to a block with no immediate dominator? // This is probably an unreachable block that has survived somehow. - bool needPHI = !IDom; + bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber()); - // Get the IDom live-out value. - if (!needPHI) { - LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock()); - if (I != LiveOutCache.end()) - IDomValue = I->second; - else - // If IDom is outside our set of live-out blocks, there must be new - // defs, and we need a phi-def here. - needPHI = true; - } - - // IDom dominates all of our predecessors, but it may not be the immediate - // dominator. Check if any of them have live-out values that are properly - // dominated by IDom. If so, we need a phi-def here. + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { + IDomValue = LiveOutCache[IDom->getBlock()]; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { LiveOutPair Value = LiveOutCache[*PI]; @@ -378,215 +517,57 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, } } + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOutCache indicates a foreign or missing value. + LiveOutPair &LOP = LiveOutCache[MBB]; + // Create a phi-def if required. if (needPHI) { ++Changes; SlotIndex Start = LIS.getMBBStartIdx(MBB); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator()); VNI->setIsPHIDef(true); - DEBUG(dbgs() << " - BB#" << MBB->getNumber() - << " phi-def #" << VNI->id << " at " << Start << '\n'); - // We no longer need LI to be live-in. - LiveIn.erase(LiveIn.begin()+(i-1)); - // Blocks in LiveIn are either IdxMBB, or have a value live-through. - if (MBB == IdxMBB) - IdxVNI = VNI; - // Check if we need to update live-out info. - LiveOutMap::iterator I = LiveOutCache.find(MBB); - if (I == LiveOutCache.end() || I->second.second == Node) { - // We already have a live-out defined in MBB, so this must be IdxMBB. - assert(MBB == IdxMBB && "Adding phi-def to known live-out"); - LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI)); - } else { - // This phi-def is also live-out, so color the whole block. + I->Value = VNI; + // This block is done, we know the final value. + I->DomNode = 0; + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - I->second = LiveOutPair(VNI, Node); + LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { - // No phi-def here. Remember incoming value for IdxMBB. - if (MBB == IdxMBB) - IdxVNI = IDomValue.first; + // No phi-def here. Remember incoming value. + I->Value = IDomValue.first; + if (I->Kill.isValid()) + continue; // Propagate IDomValue if needed: // MBB is live-out and doesn't define its own value. - LiveOutMap::iterator I = LiveOutCache.find(MBB); - if (I != LiveOutCache.end() && I->second.second != Node && - I->second.first != IDomValue.first) { + if (LOP.second != Node && LOP.first != IDomValue.first) { ++Changes; - I->second = IDomValue; - DEBUG(dbgs() << " - BB#" << MBB->getNumber() - << " idom valno #" << IDomValue.first->id - << " from BB#" << IDom->getBlock()->getNumber() << '\n'); + LOP = IDomValue; } } } - DEBUG(dbgs() << " - made " << Changes << " changes.\n"); } while (Changes); - assert(IdxVNI && "Didn't find value for Idx"); - -#ifndef NDEBUG - // Check the LiveOutCache invariants. - for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); - I != E; ++I) { - assert(I->first && "Null MBB entry in cache"); - assert(I->second.first && "Null VNInfo in cache"); - assert(I->second.second && "Null DomTreeNode in cache"); - if (I->second.second->getBlock() == I->first) - continue; - for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), - PE = I->first->pred_end(); PI != PE; ++PI) - assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant"); - } -#endif - - // Since we went through the trouble of a full BFS visiting all reaching defs, - // the values in LiveIn are now accurate. No more phi-defs are needed + // The values in LiveInBlocks are now accurate. No more phi-defs are needed // for these blocks, so we can color the live ranges. - // This makes the next mapValue call much faster. - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (SmallVectorImpl<LiveInBlock>::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + if (!I->DomNode) + continue; + assert(I->Value && "No live-in value found"); + MachineBasicBlock *MBB = I->DomNode->getBlock(); SlotIndex Start = LIS.getMBBStartIdx(MBB); - VNInfo *VNI = LiveOutCache.lookup(MBB).first; - - // Anything in LiveIn other than IdxMBB is live-through. - // In IdxMBB, we should stop at Idx unless the same value is live-out. - if (MBB == IdxMBB && IdxVNI != VNI) - LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI)); - else - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); + LI->addRange(LiveRange(Start, I->Kill.isValid() ? + I->Kill : LIS.getMBBEndIdx(MBB), I->Value)); } - - return IdxVNI; -} - -#ifndef NDEBUG -void LiveIntervalMap::dumpCache() { - for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); - I != E; ++I) { - assert(I->first && "Null MBB entry in cache"); - assert(I->second.first && "Null VNInfo in cache"); - assert(I->second.second && "Null DomTreeNode in cache"); - dbgs() << " cache: BB#" << I->first->getNumber() - << " has valno #" << I->second.first->id << " from BB#" - << I->second.second->getBlock()->getNumber() << ", preds"; - for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), - PE = I->first->pred_end(); PI != PE; ++PI) - dbgs() << " BB#" << (*PI)->getNumber(); - dbgs() << '\n'; - } - dbgs() << " cache: " << LiveOutCache.size() << " entries.\n"; -} -#endif - -// extendTo - Find the last LI value defined in MBB at or before Idx. The -// ParentLI is assumed to be live at Idx. Extend the live range to Idx. -// Return the found VNInfo, or NULL. -VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) { - assert(LI && "call reset first"); - LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx); - if (I == LI->begin()) - return 0; - --I; - if (I->end <= LIS.getMBBStartIdx(MBB)) - return 0; - if (I->end <= Idx) - I->end = Idx.getNextSlot(); - return I->valno; -} - -// addSimpleRange - Add a simple range from ParentLI to LI. -// ParentVNI must be live in the [Start;End) interval. -void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, - const VNInfo *ParentVNI) { - assert(LI && "call reset first"); - bool simple; - VNInfo *VNI = mapValue(ParentVNI, Start, &simple); - // A simple mapping is easy. - if (simple) { - LI->addRange(LiveRange(Start, End, VNI)); - return; - } - - // ParentVNI is a complex value. We must map per MBB. - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); - MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot()); - - if (MBB == MBBE) { - LI->addRange(LiveRange(Start, End, VNI)); - return; - } - - // First block. - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - - // Run sequence of full blocks. - for (++MBB; MBB != MBBE; ++MBB) { - Start = LIS.getMBBStartIdx(MBB); - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), - mapValue(ParentVNI, Start))); - } - - // Final block. - Start = LIS.getMBBStartIdx(MBB); - if (Start != End) - LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); -} - -/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. -/// All needed values whose def is not inside [Start;End) must be defined -/// beforehand so mapValue will work. -void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { - assert(LI && "call reset first"); - LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end(); - LiveInterval::const_iterator I = std::lower_bound(B, E, Start); - - // Check if --I begins before Start and overlaps. - if (I != B) { - --I; - if (I->end > Start) - addSimpleRange(Start, std::min(End, I->end), I->valno); - ++I; - } - - // The remaining ranges begin after Start. - for (;I != E && I->start < End; ++I) - addSimpleRange(I->start, std::min(End, I->end), I->valno); -} - - -//===----------------------------------------------------------------------===// -// Split Editor -//===----------------------------------------------------------------------===// - -/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, - LiveIntervals &lis, - VirtRegMap &vrm, - MachineDominatorTree &mdt, - LiveRangeEdit &edit) - : SA(sa), LIS(lis), VRM(vrm), - MRI(vrm.getMachineFunction().getRegInfo()), - MDT(mdt), - TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), - TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), - Edit(edit), - OpenIdx(0), - RegAssign(Allocator) -{ - // We don't need an AliasAnalysis since we will only be performing - // cheap-as-a-copy remats anyway. - Edit.anyRematerializable(LIS, TII, 0); -} - -void SplitEditor::dump() const { - if (RegAssign.empty()) { - dbgs() << " empty\n"; - return; - } - - for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) - dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); - dbgs() << '\n'; } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -596,51 +577,53 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit.get(RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // We may be trying to avoid interference that ends at a deleted instruction, + // so always begin RegIdx 0 early and all others late. + bool Late = RegIdx != 0; // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI); + if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) - .addReg(Edit.getReg()); - Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex(); + .addReg(Edit->getReg()); + Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) + .getDefIndex(); } // Define the value in Reg. - VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def); + VNInfo *VNI = defValue(RegIdx, ParentVNI, Def); VNI->setCopy(CopyMI); - - // Add minimal liveness for the new value. - Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); return VNI; } /// Create a new virtual register and live interval. -void SplitEditor::openIntv() { - assert(!OpenIdx && "Previous LI not closed before openIntv"); - +unsigned SplitEditor::openIntv() { // Create the complement as index 0. - if (Edit.empty()) { - Edit.create(MRI, LIS, VRM); - LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); - LIMappers.back().reset(Edit.get(0)); - } + if (Edit->empty()) + Edit->create(LIS, VRM); // Create the open interval. - OpenIdx = Edit.size(); - Edit.create(MRI, LIS, VRM); - LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); - LIMappers[OpenIdx].reset(Edit.get(OpenIdx)); + OpenIdx = Edit->size(); + Edit->create(LIS, VRM); + return OpenIdx; +} + +void SplitEditor::selectIntv(unsigned Idx) { + assert(Idx != 0 && "Cannot select the complement interval"); + assert(Idx < Edit->size() && "Can only select previously opened interval"); + OpenIdx = Idx; } SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before enterIntvBefore"); DEBUG(dbgs() << " enterIntvBefore " << Idx); Idx = Idx.getBaseIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx; @@ -658,14 +641,14 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { SlotIndex End = LIS.getMBBEndIdx(&MBB); SlotIndex Last = End.getPrevSlot(); DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return End; } DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, - LIS.getLastSplitPoint(Edit.getParent(), &MBB)); + LIS.getLastSplitPoint(Edit->getParent(), &MBB)); RegAssign.insert(VNI->def, End, OpenIdx); DEBUG(dump()); return VNI->def; @@ -689,7 +672,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { // The interval must be live beyond the instruction at Idx. Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); @@ -709,7 +692,7 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { // The interval must be live into the instruction at Idx. Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); @@ -727,7 +710,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { SlotIndex Start = LIS.getMBBStartIdx(&MBB); DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Start; @@ -742,30 +725,169 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); - assert(Edit.getParent().getVNInfoAt(Start) == - Edit.getParent().getVNInfoAt(End.getPrevSlot()) && + const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && "Parent changes value in extended range"); - assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); - // Treat this as useIntv() for now. The complement interval will be extended - // as needed by mapValue(). + // The complement interval will be extended as needed by extendRange(). + if (ParentVNI) + markComplexMapped(0, ParentVNI); DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); DEBUG(dump()); } -/// closeIntv - Indicate that we are done editing the currently open -/// LiveInterval, and ranges can be trimmed. -void SplitEditor::closeIntv() { - assert(OpenIdx && "openIntv not called before closeIntv"); - OpenIdx = 0; +/// transferValues - Transfer all possible values to the new live ranges. +/// Values that were rematerialized are left alone, they need extendRange(). +bool SplitEditor::transferValues() { + bool Skipped = false; + LiveInBlocks.clear(); + RegAssignMap::const_iterator AssignI = RegAssign.begin(); + for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), + ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { + DEBUG(dbgs() << " blit " << *ParentI << ':'); + VNInfo *ParentVNI = ParentI->valno; + // RegAssign has holes where RegIdx 0 should be used. + SlotIndex Start = ParentI->start; + AssignI.advanceTo(Start); + do { + unsigned RegIdx; + SlotIndex End = ParentI->end; + if (!AssignI.valid()) { + RegIdx = 0; + } else if (AssignI.start() <= Start) { + RegIdx = AssignI.value(); + if (AssignI.stop() < End) { + End = AssignI.stop(); + ++AssignI; + } + } else { + RegIdx = 0; + End = std::min(End, AssignI.start()); + } + + // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // Check for a simply defined value that can be blitted directly. + if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) { + DEBUG(dbgs() << ':' << VNI->id); + LI->addRange(LiveRange(Start, End, VNI)); + Start = End; + continue; + } + + // Skip rematerialized values, we need to use extendRange() and + // extendPHIKillRanges() to completely recompute the live ranges. + if (Edit->didRematerialize(ParentVNI)) { + DEBUG(dbgs() << "(remat)"); + Skipped = true; + Start = End; + continue; + } + + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } + + // This value has multiple defs in RegIdx, but it wasn't rematerialized, + // so the live range is accurate. Add live-in blocks in [Start;End) to the + // LiveInBlocks. + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex BlockStart, BlockEnd; + tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + + // The first block may be live-in, or it may have its own def. + if (Start != BlockStart) { + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped value"); + DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + // MBB has its own def. Is it also live-out? + if (BlockEnd <= End) { + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + // Skip to the next block for live-in. + ++MBB; + BlockStart = BlockEnd; + } + + // Handle the live-in blocks covered by [Start;End). + assert(Start <= BlockStart && "Expected live-in block"); + while (BlockStart < End) { + DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + BlockEnd = LIS.getMBBEndIdx(MBB); + if (BlockStart == ParentVNI->def) { + // This block has the def of a parent PHI, so it isn't live-in. + assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped parent PHI"); + if (End >= BlockEnd) { + // Live-out as well. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + } else { + // This block needs a live-in value. + LiveInBlocks.push_back(MDT[MBB]); + // The last block covered may not be live-out. + if (End < BlockEnd) + LiveInBlocks.back().Kill = End; + else { + // Live-out, but we need updateSSA to tell us the value. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair((VNInfo*)0, + (MachineDomTreeNode*)0); + } + } + BlockStart = BlockEnd; + ++MBB; + } + Start = End; + } while (Start != ParentI->end); + DEBUG(dbgs() << '\n'); + } + + if (!LiveInBlocks.empty()) + updateSSA(); + + return Skipped; +} + +void SplitEditor::extendPHIKillRanges() { + // Extend live ranges to be live-out for successor PHI values. + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { + const VNInfo *PHIVNI = *I; + if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + if (Edit->getParent().liveAt(End)) { + assert(RegAssign.lookup(End) == RegIdx && + "Different register assignment in phi predecessor"); + extendRange(RegIdx, End); + } + } + } } -/// rewriteAssigned - Rewrite all uses of Edit.getReg(). -void SplitEditor::rewriteAssigned() { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()), +/// rewriteAssigned - Rewrite all uses of Edit->getReg(). +void SplitEditor::rewriteAssigned(bool ExtendRanges) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); @@ -780,147 +902,145 @@ void SplitEditor::rewriteAssigned() { // <undef> operands don't really read the register, so just assign them to // the complement. if (MO.isUse() && MO.isUndef()) { - MO.setReg(Edit.get(0)->reg); + MO.setReg(Edit->get(0)->reg); continue; } SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + if (MO.isDef()) + Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - MO.setReg(Edit.get(RegIdx)->reg); + MO.setReg(Edit->get(RegIdx)->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); - // Extend liveness to Idx. - const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); - LIMappers[RegIdx].mapValue(ParentVNI, Idx); + // Extend liveness to Idx if the instruction reads reg. + if (!ExtendRanges) + continue; + + // Skip instructions that don't read Reg. + if (MO.isDef()) { + if (!MO.getSubReg() && !MO.isEarlyClobber()) + continue; + // We may wan't to extend a live range for a partial redef, or for a use + // tied to an early clobber. + Idx = Idx.getPrevSlot(); + if (!Edit->getParent().liveAt(Idx)) + continue; + } else + Idx = Idx.getUseIndex(); + + extendRange(RegIdx, Idx); } } -/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping. -void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs, - const ConnectedVNInfoEqClasses &ConEq) { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg), - RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); - MachineInstr *MI = MO.getParent(); - ++RI; - if (MO.isUse() && MO.isUndef()) - continue; - // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); - DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' - << Idx << ':'); - const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx); - assert(VNI && "Interval not live at use."); - MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg); - DEBUG(dbgs() << VNI->id << '\t' << *MI); +void SplitEditor::deleteRematVictims() { + SmallVector<MachineInstr*, 8> Dead; + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ + LiveInterval *LI = *I; + for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); + LII != LIE; ++LII) { + // Dead defs end at the store slot. + if (LII->end != LII->valno->def.getNextSlot()) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); + assert(MI && "Missing instruction for dead def"); + MI->addRegisterDead(LI->reg, &TRI); + + if (!MI->allDefsAreDead()) + continue; + + DEBUG(dbgs() << "All defs dead: " << *MI); + Dead.push_back(MI); + } } + + if (Dead.empty()) + return; + + Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); } -void SplitEditor::finish() { - assert(OpenIdx == 0 && "Previous LI not closed before rewrite"); +void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { + ++NumFinished; // At this point, the live intervals in Edit contain VNInfos corresponding to // the inserted copies. // Add the original defs from the parent interval. - for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), - E = Edit.getParent().vni_end(); I != E; ++I) { + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { const VNInfo *ParentVNI = *I; if (ParentVNI->isUnused()) continue; - LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)]; - VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def); - LIM.getLI()->addRange(LiveRange(ParentVNI->def, - ParentVNI->def.getNextSlot(), VNI)); - // Mark all values as complex to force liveness computation. - // This should really only be necessary for remat victims, but we are lazy. - LIM.markComplexMapped(ParentVNI); + unsigned RegIdx = RegAssign.lookup(ParentVNI->def); + VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); + VNI->setIsPHIDef(ParentVNI->isPHIDef()); + VNI->setCopy(ParentVNI->getCopy()); + + // Mark rematted values as complex everywhere to force liveness computation. + // The new live ranges may be truncated. + if (Edit->didRematerialize(ParentVNI)) + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + markComplexMapped(i, ParentVNI); } #ifndef NDEBUG // Every new interval must have a def by now, otherwise the split is bogus. - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); #endif - // FIXME: Don't recompute the liveness of all values, infer it from the - // overlaps between the parent live interval and RegAssign. - // The mapValue algorithm is only necessary when: - // - The parent value maps to multiple defs, and new phis are needed, or - // - The value has been rematerialized before some uses, and we want to - // minimize the live range so it only reaches the remaining uses. - // All other values have simple liveness that can be computed from RegAssign - // and the parent live interval. - - // Extend live ranges to be live-out for successor PHI values. - for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), - E = Edit.getParent().vni_end(); I != E; ++I) { - const VNInfo *PHIVNI = *I; - if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) - continue; - unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveIntervalMap &LIM = LIMappers[RegIdx]; - MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); - DEBUG(dbgs() << " map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def - << " -> " << RegIdx << '\n'); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); - DEBUG(dbgs() << " pred BB#" << (*PI)->getNumber() << '@' << End); - // The predecessor may not have a live-out value. That is OK, like an - // undef PHI operand. - if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) { - DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n"); - assert(RegAssign.lookup(End) == RegIdx && - "Different register assignment in phi predecessor"); - LIM.mapValue(VNI, End); - } - else - DEBUG(dbgs() << " is not live-out\n"); - } - DEBUG(dbgs() << " " << *LIM.getLI() << '\n'); - } + // Transfer the simply mapped values, check if any are skipped. + bool Skipped = transferValues(); + if (Skipped) + extendPHIKillRanges(); + else + ++NumSimple; - // Rewrite instructions. - rewriteAssigned(); + // Rewrite virtual registers, possibly extending ranges. + rewriteAssigned(Skipped); - // FIXME: Delete defs that were rematted everywhere. + // Delete defs that were rematted everywhere. + if (Skipped) + deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) (*I)->RenumberValues(LIS); + // Provide a reverse mapping from original indices to Edit ranges. + if (LRMap) { + LRMap->clear(); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + LRMap->push_back(i); + } + // Now check if any registers were separated into multiple components. ConnectedVNInfoEqClasses ConEQ(LIS); - for (unsigned i = 0, e = Edit.size(); i != e; ++i) { + for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit.get(i); + LiveInterval *li = Edit->get(i); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); SmallVector<LiveInterval*, 8> dups; dups.push_back(li); - for (unsigned i = 1; i != NumComp; ++i) - dups.push_back(&Edit.create(MRI, LIS, VRM)); - rewriteComponents(dups, ConEQ); - ConEQ.Distribute(&dups[0]); + for (unsigned j = 1; j != NumComp; ++j) + dups.push_back(&Edit->create(LIS, VRM)); + ConEQ.Distribute(&dups[0], MRI); + // The new intervals all map back to i. + if (LRMap) + LRMap->resize(Edit->size(), i); } // Calculate spill weight and allocation hints for new intervals. - VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops); - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){ - LiveInterval &li = **I; - vrai.CalculateRegClass(li.reg); - vrai.CalculateWeightAndHint(li); - DEBUG(dbgs() << " new interval " << MRI.getRegClass(li.reg)->getName() - << ":" << li << '\n'); - } + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + + assert(!LRMap || LRMap->size() == Edit->size()); } @@ -932,113 +1052,42 @@ void SplitEditor::finish() { /// may be an advantage to split CurLI for the duration of the block. bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { // If CurLI is local to one block, there is no point to splitting it. - if (LiveBlocks.size() <= 1) + if (UseBlocks.size() <= 1) return false; // Add blocks with multiple uses. - for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) { - const BlockInfo &BI = LiveBlocks[i]; - if (!BI.Uses) - continue; - unsigned Instrs = UsingBlocks.lookup(BI.MBB); - if (Instrs <= 1) - continue; - if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough) + for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) { + const BlockInfo &BI = UseBlocks[i]; + if (BI.FirstUse == BI.LastUse) continue; Blocks.insert(BI.MBB); } return !Blocks.empty(); } -/// splitSingleBlocks - Split CurLI into a separate live interval inside each -/// basic block in Blocks. -void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { - DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); - - for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) { - const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i]; - if (!BI.Uses || !Blocks.count(BI.MBB)) - continue; - - openIntv(); - SlotIndex SegStart = enterIntvBefore(BI.FirstUse); - if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) { - useIntv(SegStart, leaveIntvAfter(BI.LastUse)); - } else { +void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { + openIntv(); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse, + LastSplitPoint)); + if (!BI.LiveOut || BI.LastUse < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastUse)); + } else { // The last use is after the last valid split point. - SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint); - useIntv(SegStart, SegStop); - overlapIntv(SegStop, BI.LastUse); - } - closeIntv(); + SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastUse); } - finish(); } - -//===----------------------------------------------------------------------===// -// Sub Block Splitting -//===----------------------------------------------------------------------===// - -/// getBlockForInsideSplit - If CurLI is contained inside a single basic block, -/// and it wou pay to subdivide the interval inside that block, return it. -/// Otherwise return NULL. The returned block can be passed to -/// SplitEditor::splitInsideBlock. -const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { - // The interval must be exclusive to one block. - if (UsingBlocks.size() != 1) - return 0; - // Don't to this for less than 4 instructions. We want to be sure that - // splitting actually reduces the instruction count per interval. - if (UsingInstrs.size() < 4) - return 0; - return UsingBlocks.begin()->first; -} - -/// splitInsideBlock - Split CurLI into multiple intervals inside MBB. -void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { - SmallVector<SlotIndex, 32> Uses; - Uses.reserve(SA.UsingInstrs.size()); - for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(), - E = SA.UsingInstrs.end(); I != E; ++I) - if ((*I)->getParent() == MBB) - Uses.push_back(LIS.getInstructionIndex(*I)); - DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " - << Uses.size() << " instructions.\n"); - assert(Uses.size() >= 3 && "Need at least 3 instructions"); - array_pod_sort(Uses.begin(), Uses.end()); - - // Simple algorithm: Find the largest gap between uses as determined by slot - // indices. Create new intervals for instructions before the gap and after the - // gap. - unsigned bestPos = 0; - int bestGap = 0; - DEBUG(dbgs() << " dist (" << Uses[0]); - for (unsigned i = 1, e = Uses.size(); i != e; ++i) { - int g = Uses[i-1].distance(Uses[i]); - DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]); - if (g > bestGap) - bestPos = i, bestGap = g; - } - DEBUG(dbgs() << "), best: -" << bestGap << "-\n"); - - // bestPos points to the first use after the best gap. - assert(bestPos > 0 && "Invalid gap"); - - // FIXME: Don't create intervals for low densities. - - // First interval before the gap. Don't create single-instr intervals. - if (bestPos > 1) { - openIntv(); - useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1])); - closeIntv(); - } - - // Second interval after the gap. - if (bestPos < Uses.size()-1) { - openIntv(); - useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back())); - closeIntv(); +/// splitSingleBlocks - Split CurLI into a separate live interval inside each +/// basic block in Blocks. +void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { + DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA.getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (Blocks.count(BI.MBB)) + splitSingleBlock(BI); } - finish(); } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index e02e6297035d..2ae760a58da5 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -12,7 +12,13 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_SPLITKIT_H +#define LLVM_CODEGEN_SPLITKIT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -48,17 +54,9 @@ public: const MachineLoopInfo &Loops; const TargetInstrInfo &TII; - // Instructions using the the current register. - typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet; - InstrPtrSet UsingInstrs; - // Sorted slot indexes of using instructions. SmallVector<SlotIndex, 8> UseSlots; - // The number of instructions using CurLI in each basic block. - typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap; - BlockCountMap UsingBlocks; - /// Additional information about basic blocks where the current variable is /// live. Such a block will look like one of these templates: /// @@ -75,35 +73,37 @@ public: SlotIndex LastUse; ///< Last instr using current reg. SlotIndex Kill; ///< Interval end point inside block. SlotIndex Def; ///< Interval start point inside block. - /// Last possible point for splitting live ranges. - SlotIndex LastSplitPoint; - bool Uses; ///< Current reg has uses or defs in block. bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. - - // Per-interference pattern scratch data. - bool OverlapEntry; ///< Interference overlaps entering interval. - bool OverlapExit; ///< Interference overlaps exiting interval. }; - /// Basic blocks where var is live. This array is parallel to - /// SpillConstraints. - SmallVector<BlockInfo, 8> LiveBlocks; - private: // Current live interval. const LiveInterval *CurLI; + /// LastSplitPoint - Last legal split point in each basic block in the current + /// function. The first entry is the first terminator, the second entry is the + /// last valid split point for a variable that is live in to a landing pad + /// successor. + SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint; + + /// UseBlocks - Blocks where CurLI has uses. + SmallVector<BlockInfo, 8> UseBlocks; + + /// ThroughBlocks - Block numbers where CurLI is live through without uses. + BitVector ThroughBlocks; + + /// NumThroughBlocks - Number of live-through blocks. + unsigned NumThroughBlocks; + + SlotIndex computeLastSplitPoint(unsigned Num); + // Sumarize statistics by counting instructions using CurLI. void analyzeUses(); /// calcLiveBlockInfo - Compute per-block information about CurLI. - void calcLiveBlockInfo(); - - /// canAnalyzeBranch - Return true if MBB ends in a branch that can be - /// analyzed. - bool canAnalyzeBranch(const MachineBasicBlock *MBB); + bool calcLiveBlockInfo(); public: SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, @@ -120,9 +120,14 @@ public: /// getParent - Return the last analyzed interval. const LiveInterval &getParent() const { return *CurLI; } - /// hasUses - Return true if MBB has any uses of CurLI. - bool hasUses(const MachineBasicBlock *MBB) const { - return UsingBlocks.lookup(MBB); + /// getLastSplitPoint - Return that base index of the last valid split point + /// in the basic block numbered Num. + SlotIndex getLastSplitPoint(unsigned Num) { + // Inline the common simple case. + if (LastSplitPoint[Num].first.isValid() && + !LastSplitPoint[Num].second.isValid()) + return LastSplitPoint[Num].first; + return computeLastSplitPoint(Num); } /// isOriginalEndpoint - Return true if the original live range was killed or @@ -132,127 +137,30 @@ public: /// splitting. bool isOriginalEndpoint(SlotIndex Idx) const; - typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - - // Print a set of blocks with use counts. - void print(const BlockPtrSet&, raw_ostream&) const; - - /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from - /// having CurLI split to a new live interval. Return true if Blocks can be - /// passed to SplitEditor::splitSingleBlocks. - bool getMultiUseBlocks(BlockPtrSet &Blocks); - - /// getBlockForInsideSplit - If CurLI is contained inside a single basic - /// block, and it would pay to subdivide the interval inside that block, - /// return it. Otherwise return NULL. The returned block can be passed to - /// SplitEditor::splitInsideBlock. - const MachineBasicBlock *getBlockForInsideSplit(); -}; - + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks + /// where CurLI has uses. + ArrayRef<BlockInfo> getUseBlocks() { return UseBlocks; } -/// LiveIntervalMap - Map values from a large LiveInterval into a small -/// interval that is a subset. Insert phi-def values as needed. This class is -/// used by SplitEditor to create new smaller LiveIntervals. -/// -/// ParentLI is the larger interval, LI is the subset interval. Every value -/// in LI corresponds to exactly one value in ParentLI, and the live range -/// of the value is contained within the live range of the ParentLI value. -/// Values in ParentLI may map to any number of OpenLI values, including 0. -class LiveIntervalMap { - LiveIntervals &LIS; - MachineDominatorTree &MDT; + /// getNumThroughBlocks - Return the number of through blocks. + unsigned getNumThroughBlocks() const { return NumThroughBlocks; } - // The parent interval is never changed. - const LiveInterval &ParentLI; + /// isThroughBlock - Return true if CurLI is live through MBB without uses. + bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } - // The child interval's values are fully contained inside ParentLI values. - LiveInterval *LI; + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } - typedef DenseMap<const VNInfo*, VNInfo*> ValueMap; + /// countLiveBlocks - Return the number of blocks where li is live. + /// This is guaranteed to return the same number as getNumThroughBlocks() + + /// getUseBlocks().size() after calling analyze(li). + unsigned countLiveBlocks(const LiveInterval *li) const; - // Map ParentLI values to simple values in LI that are defined at the same - // SlotIndex, or NULL for ParentLI values that have complex LI defs. - // Note there is a difference between values mapping to NULL (complex), and - // values not present (unknown/unmapped). - ValueMap Values; - - typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; - typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap; - - // LiveOutCache - Map each basic block where LI is live out to the live-out - // value and its defining block. One of these conditions shall be true: - // - // 1. !LiveOutCache.count(MBB) - // 2. LiveOutCache[MBB].second.getNode() == MBB - // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] - // - // This is only a cache, the values can be computed as: - // - // VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB)) - // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] - // - // The cache is also used as a visiteed set by mapValue(). - LiveOutMap LiveOutCache; - - // Dump the live-out cache to dbgs(). - void dumpCache(); - -public: - LiveIntervalMap(LiveIntervals &lis, - MachineDominatorTree &mdt, - const LiveInterval &parentli) - : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {} - - /// reset - clear all data structures and start a new live interval. - void reset(LiveInterval *); - - /// getLI - return the current live interval. - LiveInterval *getLI() const { return LI; } - - /// defValue - define a value in LI from the ParentLI value VNI and Idx. - /// Idx does not have to be ParentVNI->def, but it must be contained within - /// ParentVNI's live range in ParentLI. - /// Return the new LI value. - VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); - - /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is - /// assumed that ParentVNI is live at Idx. - /// If ParentVNI has not been defined by defValue, it is assumed that - /// ParentVNI->def dominates Idx. - /// If ParentVNI has been defined by defValue one or more times, a value that - /// dominates Idx will be returned. This may require creating extra phi-def - /// values and adding live ranges to LI. - /// If simple is not NULL, *simple will indicate if ParentVNI is a simply - /// mapped value. - VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0); - - // extendTo - Find the last LI value defined in MBB at or before Idx. The - // parentli is assumed to be live at Idx. Extend the live range to include - // Idx. Return the found VNInfo, or NULL. - VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx); - - /// isMapped - Return true is ParentVNI is a known mapped value. It may be a - /// simple 1-1 mapping or a complex mapping to later defs. - bool isMapped(const VNInfo *ParentVNI) const { - return Values.count(ParentVNI); - } - - /// isComplexMapped - Return true if ParentVNI has received new definitions - /// with defValue. - bool isComplexMapped(const VNInfo *ParentVNI) const; - - /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the - /// number of definitions. - void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; } - - // addSimpleRange - Add a simple range from ParentLI to LI. - // ParentVNI must be live in the [Start;End) interval. - void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. - /// All needed values whose def is not inside [Start;End) must be defined - /// beforehand so mapValue will work. - void addRange(SlotIndex Start, SlotIndex End); + /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from + /// having CurLI split to a new live interval. Return true if Blocks can be + /// passed to SplitEditor::splitSingleBlocks. + bool getMultiUseBlocks(BlockPtrSet &Blocks); }; @@ -277,7 +185,7 @@ class SplitEditor { const TargetRegisterInfo &TRI; /// Edit - The current parent register and new intervals created. - LiveRangeEdit &Edit; + LiveRangeEdit *Edit; /// Index into Edit of the currently open interval. /// The index 0 is used for the complement, so the first interval started by @@ -295,8 +203,76 @@ class SplitEditor { /// Idx. RegAssignMap RegAssign; - /// LIMappers - One LiveIntervalMap or each interval in Edit. - SmallVector<LiveIntervalMap, 4> LIMappers; + typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap; + + /// Values - keep track of the mapping from parent values to values in the new + /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: + /// + /// 1. No entry - the value is not mapped to Edit.get(RegIdx). + /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx). + /// Each value is represented by a minimal live range at its def. + /// 3. A non-null VNInfo - the value is mapped to a single new value. + /// The new value has no live ranges anywhere. + ValueMap Values; + + typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; + + // LiveOutCache - Map each basic block where a new register is live out to the + // live-out value and its defining block. + // One of these conditions shall be true: + // + // 1. !LiveOutCache.count(MBB) + // 2. LiveOutCache[MBB].second.getNode() == MBB + // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] + // + // This is only a cache, the values can be computed as: + // + // VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB)) + // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] + // + // The cache is also used as a visited set by extendRange(). It can be shared + // by all the new registers because at most one is live out of each block. + LiveOutMap LiveOutCache; + + // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid + // entry in LiveOutCache. + BitVector LiveOutSeen; + + /// LiveInBlock - Info for updateSSA() about a block where a register is + /// live-in. + /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA() + /// adds the computed live-in value. + struct LiveInBlock { + // Dominator tree node for the block. + // Cleared by updateSSA when the final value has been determined. + MachineDomTreeNode *DomNode; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. + SlotIndex Kill; + + LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {} + }; + + /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and + /// updateSSA(). This list is usually empty, it exists here to avoid frequent + /// reallocations. + SmallVector<LiveInBlock, 16> LiveInBlocks; + + /// defValue - define a value in RegIdx from ParentVNI at Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in ParentLI. The new value is added to the value + /// map. + /// Return the new LI value. + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + + /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless + /// of the number of defs. + void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI); /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. @@ -306,27 +282,56 @@ class SplitEditor { MachineBasicBlock &MBB, MachineBasicBlock::iterator I); + /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx. + /// Insert PHIDefs as needed to preserve SSA form. + void extendRange(unsigned RegIdx, SlotIndex Idx); + + /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all + /// reaching defs for LI are found. + /// @param LI Live interval whose value is needed. + /// @param MBB Block where LI should be live-in. + /// @param Kill Kill point in MBB. + /// @return Unique value seen, or NULL. + VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB, + SlotIndex Kill); + + /// updateSSA - Compute and insert PHIDefs such that all blocks in + // LiveInBlocks get a known live-in value. Add live ranges to the blocks. + void updateSSA(); + + /// transferValues - Transfer values to the new ranges. + /// Return true if any ranges were skipped. + bool transferValues(); + + /// extendPHIKillRanges - Extend the ranges of all values killed by original + /// parent PHIDefs. + void extendPHIKillRanges(); + /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. - void rewriteAssigned(); + void rewriteAssigned(bool ExtendRanges); - /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq - /// classes in ConEQ. - /// This must be done when Intvs[0] is styill live at all uses, before calling - /// ConEq.Distribute(). - void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs, - const ConnectedVNInfoEqClasses &ConEq); + /// deleteRematVictims - Delete defs that are dead after rematerializing. + void deleteRematVictims(); public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&, LiveRangeEdit&); + MachineDominatorTree&); - /// getAnalysis - Get the corresponding analysis. - SplitAnalysis &getAnalysis() { return SA; } + /// reset - Prepare for a new split. + void reset(LiveRangeEdit&); /// Create a new virtual register and live interval. - void openIntv(); + /// Return the interval index, starting from 1. Interval index 0 is the + /// implicit complement interval. + unsigned openIntv(); + + /// currentIntv - Return the current interval index. + unsigned currentIntv() const { return OpenIdx; } + + /// selectIntv - Select a previously opened interval index. + void selectIntv(unsigned Idx); /// enterIntvBefore - Enter the open interval before the instruction at Idx. /// If the parent interval is not live before Idx, a COPY is not inserted. @@ -369,25 +374,28 @@ public: /// void overlapIntv(SlotIndex Start, SlotIndex End); - /// closeIntv - Indicate that we are done editing the currently open - /// LiveInterval, and ranges can be trimmed. - void closeIntv(); - /// finish - after all the new live ranges have been created, compute the /// remaining live range, and rewrite instructions to use the new registers. - void finish(); + /// @param LRMap When not null, this vector will map each live range in Edit + /// back to the indices returned by openIntv. + /// There may be extra indices created by dead code elimination. + void finish(SmallVectorImpl<unsigned> *LRMap = 0); /// dump - print the current interval maping to dbgs(). void dump() const; // ===--- High level methods ---=== + /// splitSingleBlock - Split CurLI into a separate live interval around the + /// uses in a single block. This is intended to be used as part of a larger + /// split, and doesn't call finish(). + void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); + /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); - - /// splitInsideBlock - Split CurLI into multiple intervals inside MBB. - void splitInsideBlock(const MachineBasicBlock *); }; } + +#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index fcaee4208ba3..f0a44abaf5cd 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -153,7 +153,6 @@ bool StackProtector::InsertStackProtectors() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; - ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; @@ -191,8 +190,6 @@ bool StackProtector::InsertStackProtectors() { // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); - if (DT) - FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0; } // For each block with a return instruction, convert this: @@ -219,9 +216,10 @@ bool StackProtector::InsertStackProtectors() { // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); - if (DT) { - DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0); - FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB); + + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; } // Remove default branch instruction to the new BB. @@ -242,7 +240,7 @@ bool StackProtector::InsertStackProtectors() { // statements in the function. if (!FailBB) return false; - if (DT) + if (DT && FailBBDom) DT->addNewBlock(FailBB, FailBBDom); return true; diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index ec7829ec39fe..227eb47e6827 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -587,7 +587,7 @@ StrongPHIElimination::SplitInterferencesForBasicBlock( } // We now walk the PHIs in successor blocks and check for interferences. This - // is necesary because the use of a PHI's operands are logically contained in + // is necessary because the use of a PHI's operands are logically contained in // the predecessor block. The def of a PHI's destination register is processed // along with the other defs in a basic block. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 15340a3f1084..b9fcd3804d7f 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -388,11 +388,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, if (MO.isDef() != (i == 0)) return false; - // For the def, it should be the only def of that register. - if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || - MRI.isLiveIn(Reg))) - return false; - // Don't allow any virtual-register uses. Rematting an instruction with // virtual register uses would length the live ranges of the uses, which // is not necessarily a good idea, certainly not "trivial". diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index fa311dc5d66c..6ed91b09966e 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -176,6 +177,52 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { SectionKind::getDataRel()); } +MCSymbol * +TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, + Mangler *Mang, + MachineModuleInfo *MMI) const { + unsigned Encoding = getPersonalityEncoding(); + switch (Encoding & 0x70) { + default: + report_fatal_error("We do not support this DWARF encoding yet!"); + case dwarf::DW_EH_PE_absptr: + return Mang->getSymbol(GV); + break; + case dwarf::DW_EH_PE_pcrel: { + Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName(); + return getContext().GetOrCreateSymbol(FullName); + break; + } + } +} + +void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { + Twine FullName = StringRef("DW.ref.") + Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(FullName); + Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); + Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + Twine SectionName = StringRef(".data.") + Label->getName(); + SmallString<64> NameData; + SectionName.toVector(NameData); + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; + const MCSection *Sec = getContext().getELFSection(NameData, + ELF::SHT_PROGBITS, + Flags, + SectionKind::getDataRel(), + 0, Label->getName()); + Streamer.SwitchSection(Sec); + Streamer.EmitValueToAlignment(8); + Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + const MCExpr *E = MCConstantExpr::Create(8, getContext()); + Streamer.EmitELFSize(Label, E); + Streamer.EmitLabel(Label); + + unsigned Size = TM.getTargetData()->getPointerSize(); + Streamer.EmitSymbolValue(Sym, Size); +} + static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // FIXME: Why is this here? Codegen is should not be in the business @@ -424,8 +471,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: @@ -446,18 +492,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; + // .comm doesn't support alignment before Leopard. Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); - if (T.getOS() == Triple::Darwin) { - switch (T.getDarwinMajorNumber()) { - case 7: // 10.3 Panther. - case 8: // 10.4 Tiger. - CommDirectiveSupportsAlignment = false; - break; - case 9: // 10.5 Leopard. - case 10: // 10.6 SnowLeopard. - break; - } - } + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) + CommDirectiveSupportsAlignment = false; TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -641,10 +679,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; - unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, - TAA, StubSize); + TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. report_fatal_error("Global variable '" + GV->getNameStr() + @@ -654,17 +693,13 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } - bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES); - if (!TAAWasSet) - TAA = 0; // Sensible default if this is a new section. - // Get the section. const MCSectionMachO *S = getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // If TAA wasn't set by ParseSectionSpecifier() above, // use the value returned by getMachOSection() as a default. - if (!TAAWasSet) + if (!TAAParsed) TAA = S->getTypeAndAttributes(); // Okay, now that we got the section, verify that the TAA & StubSize agree. @@ -806,14 +841,36 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +MCSymbol *TargetLoweringObjectFileMachO:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + // The mach-o version of this method defaults to returning a stub reference. + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return SSym; +} + unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; } @@ -822,7 +879,7 @@ unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { return DW_EH_PE_pcrel; } -unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { +unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const { return DW_EH_PE_pcrel; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index b3120b8be1ab..52ea87231ccd 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -105,7 +105,7 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist); - bool isProfitableToConv3Addr(unsigned RegA); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -124,7 +124,11 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, + SmallPtrSet<MachineInstr*, 8> &Processed); + + void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed); void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); @@ -615,16 +619,18 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, /// isProfitableToConv3Addr - Return true if it is profitable to convert the /// given 2-address instruction to a 3-address one. bool -TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: // %reg1024<def> = MOV r1 // %reg1025<def> = MOV r0 // %reg1026<def> = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. - unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + if (!FromRegB) + return false; unsigned ToRegA = getMappedReg(RegA, DstRegMap); - return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); + return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } /// ConvertInstTo3Addr - Convert the specified two-address instruction into a @@ -664,6 +670,54 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, return false; } +/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// is a copy or a two-address instruction. +void +TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet<MachineInstr*, 8> &Processed) { + SmallVector<unsigned, 4> VirtRegPairs; + bool IsDstPhys; + bool IsCopy = false; + unsigned NewReg = 0; + unsigned Reg = DstReg; + while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, + NewReg, IsDstPhys)) { + if (IsCopy && !Processed.insert(UseMI)) + break; + + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + VirtRegPairs.push_back(NewReg); + Reg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); + ToReg = FromReg; + } + bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!"); + } +} + /// ProcessCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in @@ -695,49 +749,11 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - SmallVector<unsigned, 4> VirtRegPairs; - bool IsCopy = false; - unsigned NewReg = 0; - while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, - IsCopy, NewReg, IsDstPhys)) { - if (IsCopy) { - if (!Processed.insert(UseMI)) - break; - } - - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end()) - // Earlier in the same MBB.Reached via a back edge. - break; - - if (IsDstPhys) { - VirtRegPairs.push_back(NewReg); - break; - } - bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; - if (!isNew) - assert(SrcRegMap[NewReg] == DstReg && - "Can't map to two src physical registers!"); - VirtRegPairs.push_back(NewReg); - DstReg = NewReg; - } - - if (!VirtRegPairs.empty()) { - unsigned ToReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - while (!VirtRegPairs.empty()) { - unsigned FromReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; - if (!isNew) - assert(DstRegMap[FromReg] == ToReg && - "Can't map to two dst physical registers!"); - ToReg = FromReg; - } - } + ScanUses(DstReg, MBB, Processed); } Processed.insert(MI); + return; } /// isSafeToDelete - If the specified instruction does not produce any side @@ -836,7 +852,8 @@ bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + unsigned SrcIdx, unsigned DstIdx, unsigned Dist, + SmallPtrSet<MachineInstr*, 8> &Processed) { const TargetInstrDesc &TID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -887,10 +904,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); + if (TID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. - if (!regBKilled || isProfitableToConv3Addr(regA)) { + if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { ++NumConvertedTo3Addr; @@ -951,7 +971,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = TryInstructionTransform(NewMI, mi, mbbi, - NewSrcIdx, NewDstIdx, Dist); + NewSrcIdx, NewDstIdx, Dist, Processed); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1100,7 +1120,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) break; // The tied operands have been eliminated. } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 734b87e62f62..226b78f7bcbd 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -259,7 +259,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getFunction()->getName() << '\n'); - + DEBUG(dump()); + SmallVector<unsigned, 8> SuperDeads; + SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); @@ -283,12 +285,13 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. - if (MO.isUse() && MO.isKill() && !MO.isUndef()) - SuperKills.push_back(PhysReg); - - // We don't have to deal with sub-register defs because - // LiveIntervalAnalysis already added the necessary <imp-def> - // operands. + if (MO.isUse()) { + if (MO.isKill() && !MO.isUndef()) + SuperKills.push_back(PhysReg); + } else if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); @@ -305,16 +308,28 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { while (!SuperKills.empty()) MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + while (!SuperDeads.empty()) + MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + + while (!SuperDefs.empty()) + MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + DEBUG(dbgs() << "> " << *MI); // Finally, remove any identity copies. if (MI->isIdentityCopy()) { - DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); - if (Indexes) - Indexes->removeMachineInstrFromMaps(MI); - // It's safe to erase MI because MII has already been incremented. - MI->eraseFromParent(); + if (MI->getNumOperands() == 2) { + DEBUG(dbgs() << "Deleting identity copy.\n"); + RemoveMachineInstrFromMaps(MI); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); + } else { + // Transform identity copy to a KILL to deal with subregisters. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "Identity copy: " << *MI); + } } } } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index ec149dddc1d9..185065880581 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -32,7 +32,7 @@ STATISTIC(NumCommutes, "Number of instructions commuted"); STATISTIC(NumDRM , "Number of re-materializable defs elided"); STATISTIC(NumStores , "Number of stores added"); STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumOmitted , "Number of reloads omitted"); STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); STATISTIC(NumCopified, "Number of available reloads turned into copies"); STATISTIC(NumReMats , "Number of re-materialization"); @@ -261,6 +261,10 @@ public: /// now). void ModifyStackSlotOrReMat(int SlotOrReMat); + /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, + /// other stack slots sharing the same register are no longer valid. + void ClobberSharingStackSlots(int StackSlot); + /// AddAvailableRegsToLiveIn - Availability information is being kept coming /// into the specified MBB. Add available physical registers as potential /// live-in's. If they are reused in the MBB, they will be added to the @@ -665,7 +669,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, } } -/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. /// static void ReMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, @@ -831,6 +835,26 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { PhysRegsAvailable.erase(I); } +void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { + std::map<int, unsigned>::iterator It = + SpillSlotsOrReMatsAvailable.find(StackSlot); + if (It == SpillSlotsOrReMatsAvailable.end()) return; + unsigned Reg = It->second >> 1; + + // Erase entries in PhysRegsAvailable for other stack slots. + std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg); + while (I != PhysRegsAvailable.end() && I->first == Reg) { + std::multimap<unsigned, int>::iterator NextI = llvm::next(I); + if (I->second != StackSlot) { + DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " + << PrintReg(Reg, TRI) << '\n'); + SpillSlotsOrReMatsAvailable.erase(I->second); + PhysRegsAvailable.erase(I); + } + I = NextI; + } +} + // ************************** // // Reuse Info Implementation // // ************************** // @@ -1791,8 +1815,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, else DEBUG(dbgs() << "Reusing SS#" << SSorRMId); DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " + << TRI->getName(InReg) << " for " << PrintReg(VirtReg) + <<" instead of reloading into physreg " << TRI->getName(Phys) << '\n'); // Reusing a physreg may resurrect it. But we expect ProcessUses to update @@ -1807,8 +1831,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, else DEBUG(dbgs() << "Reusing SS#" << SSorRMId); DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" by copying it into physreg " + << TRI->getName(InReg) << " for " << PrintReg(VirtReg) + <<" by copying it into physreg " << TRI->getName(Phys) << '\n'); // If the reloaded / remat value is available in another register, @@ -2025,7 +2049,8 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, TRI->regsOverlap(MOk.getReg(), PhysReg)) { CanReuse = false; DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg << ": " << MOk << '\n'); + << " for " << PrintReg(VirtReg) << ": " << MOk + << '\n'); break; } } @@ -2039,9 +2064,9 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, else DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(VRM->getPhys(VirtReg)) << '\n'); + << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) + << " instead of reloading into " + << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -2126,7 +2151,7 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, else DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg + << " for " << PrintReg(VirtReg) << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -2315,7 +2340,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { unsigned VirtReg = FoldedVirts[FVI].first; VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); + DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); int SS = VRM->getStackSlot(VirtReg); if (SS == VirtRegMap::NO_STACK_SLOT) @@ -2549,6 +2574,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, } } + // If StackSlot is available in a register that also holds other stack + // slots, clobber those stack slots now. + Spills.ClobberSharingStackSlots(StackSlot); + assert(PhysReg && "VR not assigned a physical register?"); MRI->setPhysRegUsed(PhysReg); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; |