diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2011-02-20 12:57:14 +0000 |
commit | cf099d11218cb6f6c5cce947d6738e347f07fb12 (patch) | |
tree | d2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/CodeGen | |
parent | 49011b52fcba02a6051957b84705159f52fae4e4 (diff) | |
download | src-test2-cf099d11218cb6f6c5cce947d6738e347f07fb12.tar.gz src-test2-cf099d11218cb6f6c5cce947d6738e347f07fb12.zip |
Notes
Diffstat (limited to 'lib/CodeGen')
139 files changed, 17732 insertions, 11835 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5a634d6ccb01..b520d8fcedc0 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -155,16 +155,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { - unsigned Reg = *I; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - State->UnionGroups(AliasReg, 0); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; + for (const unsigned *Alias = TRI->getOverlaps(*I); + unsigned Reg = *Alias; ++Alias) { + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; } } } @@ -176,16 +171,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - State->UnionGroups(AliasReg, 0); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; + for (const unsigned *Alias = TRI->getOverlaps(*I); + unsigned Reg = *Alias; ++Alias) { + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; } } @@ -197,12 +187,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (const unsigned *Alias = TRI->getOverlaps(Reg); + unsigned AliasReg = *Alias; ++Alias) { State->UnionGroups(AliasReg, 0); KillIndices[AliasReg] = BB->size(); DefIndices[AliasReg] = ~0u; @@ -435,12 +421,9 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, continue; // Update def for Reg and aliases. - DefIndices[Reg] = Count; - for (const unsigned *Alias = TRI->getAliasSet(Reg); - *Alias; ++Alias) { - unsigned AliasReg = *Alias; + for (const unsigned *Alias = TRI->getOverlaps(Reg); + unsigned AliasReg = *Alias; ++Alias) DefIndices[AliasReg] = Count; - } } } diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp new file mode 100644 index 000000000000..20c7625f3253 --- /dev/null +++ b/lib/CodeGen/AllocationOrder.cpp @@ -0,0 +1,68 @@ +//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an allocation order for virtual registers. +// +// The preferred allocation order for a virtual register depends on allocation +// hints and target hooks. The AllocationOrder class encapsulates all of that. +// +//===----------------------------------------------------------------------===// + +#include "AllocationOrder.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +// Compare VirtRegMap::getRegAllocPref(). +AllocationOrder::AllocationOrder(unsigned VirtReg, + const VirtRegMap &VRM, + const BitVector &ReservedRegs) + : Pos(0), Reserved(ReservedRegs) { + const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg); + std::pair<unsigned, unsigned> HintPair = + VRM.getRegInfo().getRegAllocationHint(VirtReg); + + // HintPair.second is a register, phys or virt. + Hint = HintPair.second; + + // Translate to physreg, or 0 if not assigned yet. + if (TargetRegisterInfo::isVirtualRegister(Hint)) + Hint = VRM.getPhys(Hint); + + // The remaining allocation order may depend on the hint. + tie(Begin, End) = VRM.getTargetRegInfo() + .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction()); + + // Target-dependent hints require resolution. + if (HintPair.first) + Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint, + VRM.getMachineFunction()); + + // The hint must be a valid physreg for allocation. + if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || + !RC->contains(Hint) || ReservedRegs.test(Hint))) + Hint = 0; +} + +unsigned AllocationOrder::next() { + // First take the hint. + if (!Pos) { + Pos = Begin; + if (Hint) + return Hint; + } + // Then look at the order from TRI. + while(Pos != End) { + unsigned Reg = *Pos++; + if (Reg != Hint && !Reserved.test(Reg)) + return Reg; + } + return 0; +} diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h new file mode 100644 index 000000000000..3db4b6925fca --- /dev/null +++ b/lib/CodeGen/AllocationOrder.h @@ -0,0 +1,54 @@ +//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an allocation order for virtual registers. +// +// The preferred allocation order for a virtual register depends on allocation +// hints and target hooks. The AllocationOrder class encapsulates all of that. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H +#define LLVM_CODEGEN_ALLOCATIONORDER_H + +namespace llvm { + +class BitVector; +class VirtRegMap; + +class AllocationOrder { + const unsigned *Begin; + const unsigned *End; + const unsigned *Pos; + const BitVector &Reserved; + unsigned Hint; +public: + + /// AllocationOrder - Create a new AllocationOrder for VirtReg. + /// @param VirtReg Virtual register to allocate for. + /// @param VRM Virtual register map for function. + /// @param ReservedRegs Set of reserved registers as returned by + /// TargetRegisterInfo::getReservedRegs(). + AllocationOrder(unsigned VirtReg, + const VirtRegMap &VRM, + const BitVector &ReservedRegs); + + /// next - Return the next physical register in the allocation order, or 0. + /// It is safe to call next again after it returned 0. + /// It will keep returning 0 until rewind() is called. + unsigned next(); + + /// rewind - Start over from the beginning. + void rewind() { Pos = 0; } + +}; + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index e3dd646c952e..36638c36de67 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -19,6 +19,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -30,7 +31,7 @@ using namespace llvm; /// of insertvalue or extractvalue indices that identify a member, return /// the linearized index of the start of the member. /// -unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, +unsigned llvm::ComputeLinearIndex(const Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, unsigned CurIndex) { @@ -45,8 +46,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, EE = STy->element_end(); EI != EE; ++EI) { if (Indices && *Indices == unsigned(EI - EB)) - return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex); } return CurIndex; } @@ -55,8 +56,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, const Type *EltTy = ATy->getElementType(); for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { if (Indices && *Indices == i) - return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex); } return CurIndex; } @@ -125,7 +126,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) { /// hasInlineAsmMemConstraint - Return true if the inline asm instruction being /// processed uses a memory 'm' constraint. bool -llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos, +llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, const TargetLowering &TLI) { for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { InlineAsm::ConstraintInfo &CI = CInfos[i]; @@ -283,3 +284,20 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, return true; } +bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + const TargetLowering &TLI) { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if (CallerRetAttr & ~Attribute::NoAlias) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Check if the only use is a function return node. + return TLI.isUsedByReturnOnly(Node); +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d358ab20ffc5..43e8990a9da1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -38,6 +38,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Assembly/Writer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/ErrorHandling.h" @@ -178,16 +179,24 @@ bool AsmPrinter::doInitialization(Module &M) { if (!M.getModuleInlineAsm().empty()) { OutStreamer.AddComment("Start of file scope inline assembly"); OutStreamer.AddBlankLine(); - EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/); + EmitInlineAsm(M.getModuleInlineAsm()+"\n"); OutStreamer.AddComment("End of file scope inline assembly"); OutStreamer.AddBlankLine(); } if (MAI->doesSupportDebugInformation()) DD = new DwarfDebug(this, &M); - + if (MAI->doesSupportExceptionHandling()) - DE = new DwarfException(this); + switch (MAI->getExceptionHandlingType()) { + default: + case ExceptionHandling::DwarfTable: + DE = new DwarfTableException(this); + break; + case ExceptionHandling::DwarfCFI: + DE = new DwarfCFIException(this); + break; + } return false; } @@ -282,8 +291,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle common symbols. if (GVKind.isCommon()) { + unsigned Align = 1 << AlignLog; + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; + // .comm _foo, 42, 4 - OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog); + OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } @@ -301,11 +314,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitLocalCommonSymbol(GVSym, Size); return; } + + unsigned Align = 1 << AlignLog; + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; // .local _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 - OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog); + OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } @@ -327,6 +344,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Handle thread local data for mach-o which requires us to output an // additional structure of data and mangle the original symbol so that we // can reference it later. + // + // TODO: This should become an "emit thread local global" method on TLOF. + // All of this macho specific stuff should be sunk down into TLOFMachO and + // stuff like "TLSExtraDataSection" should no longer be part of the parent + // TLOF class. This will also make it more obvious that stuff like + // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho + // specific code. if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol MCSymbol *MangSym = @@ -623,7 +647,7 @@ void AsmPrinter::EmitFunctionBody() { if (ShouldPrintDebugScopes) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->beginScope(II); + DD->beginInstruction(II); } if (isVerbose()) @@ -657,7 +681,7 @@ void AsmPrinter::EmitFunctionBody() { if (ShouldPrintDebugScopes) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - DD->endScope(II); + DD->endInstruction(II); } } } @@ -729,7 +753,20 @@ bool AsmPrinter::doFinalization(Module &M) { for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) EmitGlobalVariable(I); - + + // Emit visibility info for declarations + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { + const Function &F = *I; + if (!F.isDeclaration()) + continue; + GlobalValue::VisibilityTypes V = F.getVisibility(); + if (V == GlobalValue::DefaultVisibility) + continue; + + MCSymbol *Name = Mang->getSymbol(&F); + EmitVisibility(Name, V); + } + // Finalize debug and EH information. if (DE) { { @@ -905,14 +942,6 @@ void AsmPrinter::EmitConstantPool() { const Type *Ty = CPE.getType(); Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); - - // Emit the label with a comment on it. - if (isVerbose()) { - OutStreamer.GetCommentOS() << "constant pool "; - WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(), - MF->getFunction()->getParent()); - OutStreamer.GetCommentOS() << '\n'; - } OutStreamer.EmitLabel(GetCPISymbol(CPI)); if (CPE.isMachineConstantPoolEntry()) @@ -983,7 +1012,7 @@ void AsmPrinter::EmitJumpTableInfo() { } } - // On some targets (e.g. Darwin) we want to emit two consequtive labels + // On some targets (e.g. Darwin) we want to emit two consecutive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. @@ -1004,6 +1033,7 @@ void AsmPrinter::EmitJumpTableInfo() { void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned UID) const { + assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); const MCExpr *Value = 0; switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index ce4519c541e3..98a1bf2f1ce4 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -19,7 +19,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -36,9 +36,8 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) { - // FIXME: MCize. - OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value)); + if (MAI->hasLEB128()) { + OutStreamer.EmitSLEB128IntValue(Value); return; } @@ -60,10 +59,10 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - - if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) { - // FIXME: MCize. - OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value)); + + // FIXME: Should we add a PadTo option to the streamer? + if (MAI->hasLEB128() && PadTo == 0) { + OutStreamer.EmitULEB128IntValue(Value); return; } @@ -157,7 +156,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { const MCExpr *Exp = TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer); - OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0); + OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding)); } void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{ @@ -215,8 +214,8 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves, const TargetRegisterInfo *RI = TM.getRegisterInfo(); int stackGrowth = TM.getTargetData()->getPointerSize(); - if (TM.getFrameInfo()->getStackGrowthDirection() != - TargetFrameInfo::StackGrowsUp) + if (TM.getFrameLowering()->getStackGrowthDirection() != + TargetFrameLowering::StackGrowsUp) stackGrowth *= -1; for (unsigned i = 0, N = Moves.size(); i < N; ++i) { @@ -277,3 +276,43 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves, } } } + +/// EmitFrameMoves - Emit frame instructions to describe the layout of the +/// frame. +void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + + int stackGrowth = TM.getTargetData()->getPointerSize(); + if (TM.getFrameLowering()->getStackGrowthDirection() != + TargetFrameLowering::StackGrowsUp) + stackGrowth *= -1; + + for (unsigned i = 0, N = Moves.size(); i < N; ++i) { + const MachineMove &Move = Moves[i]; + MCSymbol *Label = Move.getLabel(); + // Throw out move if the label is invalid. + if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + assert(!Src.isReg() && "Machine move not supported yet."); + + if (Src.getReg() == MachineLocation::VirtualFP) { + OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); + } else { + assert("Machine move not supported yet"); + // Reg + Offset + } + } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(!Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), + Dst.getOffset()); + } + } +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index df0316814c08..c6166e2365a5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -34,15 +34,47 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +namespace { + struct SrcMgrDiagInfo { + const MDNode *LocInfo; + LLVMContext::InlineAsmDiagHandlerTy DiagHandler; + void *DiagContext; + }; +} + +/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo +/// struct above. +static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { + SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo); + assert(DiagInfo && "Diagnostic context not passed down?"); + + // If the inline asm had metadata associated with it, pull out a location + // cookie corresponding to which line the error occurred on. + unsigned LocCookie = 0; + if (const MDNode *LocInfo = DiagInfo->LocInfo) { + unsigned ErrorLine = Diag.getLineNo()-1; + if (ErrorLine >= LocInfo->getNumOperands()) + ErrorLine = 0; + + if (LocInfo->getNumOperands() != 0) + if (const ConstantInt *CI = + dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine))) + LocCookie = CI->getZExtValue(); + } + + DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); +} + /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. -void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { +void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const { assert(!Str.empty() && "Can't emit empty inline asm block"); - + // Remember if the buffer is nul terminated or not so we can avoid a copy. bool isNullTerminated = Str.back() == 0; if (isNullTerminated) Str = Str.substr(0, Str.size()-1); - + // If the output streamer is actually a .s file, just emit the blob textually. // This is useful in case the asm parser doesn't handle something but the // system assembler does. @@ -50,18 +82,23 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { OutStreamer.EmitRawText(Str); return; } - + SourceMgr SrcMgr; - + SrcMgrDiagInfo DiagInfo; + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; - if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) { - SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler, - LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie); + if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { + // If the source manager has an issue, we arrange for SrcMgrDiagHandler + // to be invoked, getting DiagInfo passed into it. + DiagInfo.LocInfo = LocMDNode; + DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); + SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo); HasDiagHandler = true; } - + MemoryBuffer *Buffer; if (isNullTerminated) Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>"); @@ -70,7 +107,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - + OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI)); @@ -92,15 +129,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { /// instruction that is an inline asm. void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); - + unsigned NumOperands = MI->getNumOperands(); - + // Count the number of register definitions to find the asm string. unsigned NumDefs = 0; for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); ++NumDefs) assert(NumDefs != NumOperands-2 && "No asm string?"); - + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. @@ -128,22 +165,23 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Get the !srcloc metadata node if we have it, and decode the loc cookie from // it. unsigned LocCookie = 0; + const MDNode *LocMD = 0; for (unsigned i = MI->getNumOperands(); i != 0; --i) { - if (MI->getOperand(i-1).isMetadata()) - if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata()) - if (SrcLoc->getNumOperands() != 0) - if (const ConstantInt *CI = - dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) { - LocCookie = CI->getZExtValue(); - break; - } + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } } - + // Emit the inline asm to a temporary string so we can emit it through // EmitInlineAsm. SmallString<256> StringData; raw_svector_ostream OS(StringData); - + OS << '\t'; // The variant of the current asmprinter. @@ -151,7 +189,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. - + while (*LastEmitted) { switch (*LastEmitted) { default: { @@ -199,18 +237,18 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { ++LastEmitted; // consume ')' character. if (CurVariant == -1) OS << '}'; // this is gcc's behavior for } outside a variant - else + else CurVariant = -1; break; } if (Done) break; - + bool HasCurlyBraces = false; if (*LastEmitted == '{') { // ${variable} ++LastEmitted; // Consume '{' character. HasCurlyBraces = true; } - + // If we have ${:foo}, then this is not a real operand reference, it is a // "magic" string reference, just like in .td files. Arrange to call // PrintSpecial. @@ -221,25 +259,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (StrEnd == 0) report_fatal_error("Unterminated ${:foo} operand in inline asm" " string: '" + Twine(AsmStr) + "'"); - + std::string Val(StrStart, StrEnd); PrintSpecial(MI, OS, Val.c_str()); LastEmitted = StrEnd+1; break; } - + const char *IDStart = LastEmitted; const char *IDEnd = IDStart; - while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; - + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + unsigned Val; if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) report_fatal_error("Bad $ operand number in inline asm string: '" + Twine(AsmStr) + "'"); LastEmitted = IDEnd; - + char Modifier[2] = { 0, 0 }; - + if (HasCurlyBraces) { // If we have curly braces, check for a modifier character. This // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. @@ -248,25 +286,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (*LastEmitted == 0) report_fatal_error("Bad ${:} expression in inline asm string: '" + Twine(AsmStr) + "'"); - + Modifier[0] = *LastEmitted; ++LastEmitted; // Consume modifier character. } - + if (*LastEmitted != '}') report_fatal_error("Bad ${} expression in inline asm string: '" + Twine(AsmStr) + "'"); ++LastEmitted; // Consume '}' character. } - + if (Val >= NumOperands-1) report_fatal_error("Invalid $ operand number in inline asm string: '" + Twine(AsmStr) + "'"); - + // Okay, we finally have a value number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = 2; + unsigned OpNo = InlineAsm::MIOp_FirstOperand; bool Error = false; @@ -310,8 +348,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { } } OS << '\n' << (char)0; // null terminate string. - EmitInlineAsm(OS.str(), LocCookie); - + EmitInlineAsm(OS.str(), LocMD); + // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't // enabled, so we use EmitRawText. if (OutStreamer.hasRawTextSupport()) @@ -335,7 +373,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, } else if (!strcmp(Code, "uid")) { // Comparing the address of MI isn't sufficient, because machineinstrs may // be allocated to the same address across functions. - + // If this is a new LastFn instruction, bump the counter. if (LastMI != MI || LastFn != getFunctionNumber()) { ++Counter; @@ -349,7 +387,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, Msg << "Unknown special formatter '" << Code << "' for machine instr: " << *MI; report_fatal_error(Msg.str()); - } + } } /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index ca8b8436c11f..306efade7d92 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -3,9 +3,10 @@ add_llvm_library(LLVMAsmPrinter AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp + DwarfCFIException.cpp DwarfDebug.cpp DwarfException.cpp + DwarfTableException.cpp OcamlGCPrinter.cpp ) -target_link_libraries (LLVMAsmPrinter LLVMMCParser) diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp new file mode 100644 index 000000000000..68be2eed8f0e --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -0,0 +1,138 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +DwarfCFIException::DwarfCFIException(AsmPrinter *A) + : DwarfException(A), + shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + {} + +DwarfCFIException::~DwarfCFIException() {} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfCFIException::EndModule() { + if (!Asm->MAI->isExceptionHandlingDwarf()) + return; + + if (!shouldEmitTableModule) + return; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + // Begin eh frame section. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + + // Emit references to all used personality functions + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + for (size_t i = 0, e = Personalities.size(); i != e; ++i) { + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i)); + Asm->EmitReference(Personalities[i], PerEncoding); + } +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void DwarfCFIException::BeginFunction(const MachineFunction *MF) { + shouldEmitTable = shouldEmitMoves = false; + + // If any landing pads survive, we need an EH table. + shouldEmitTable = !MMI->getLandingPads().empty(); + + // See if we need frame move info. + shouldEmitMoves = + !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; + + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); + + shouldEmitTableModule |= shouldEmitTable; + + if (shouldEmitMoves) { + const TargetFrameLowering *TFL = Asm->TM.getFrameLowering(); + Asm->OutStreamer.EmitCFIStartProc(); + + // Indicate locations of general callee saved registers in frame. + std::vector<MachineMove> Moves; + TFL->getInitialFrameState(Moves); + Asm->EmitCFIFrameMoves(Moves); + Asm->EmitCFIFrameMoves(MMI->getFrameMoves()); + } + + if (!shouldEmitTable) + return; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + // Provide LSDA information. + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + if (LSDAEncoding != dwarf::DW_EH_PE_omit) + Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber()), + LSDAEncoding); + + // Indicate personality routine, if any. + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + if (PerEncoding != dwarf::DW_EH_PE_omit && + MMI->getPersonalities()[MMI->getPersonalityIndex()]) + Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality", + MMI->getPersonalityIndex()), + PerEncoding); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfCFIException::EndFunction() { + if (!shouldEmitMoves && !shouldEmitTable) return; + + if (shouldEmitMoves) + Asm->OutStreamer.EmitCFIEndProc(); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + if (shouldEmitTable) + EmitExceptionTable(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c886a5ecc615..5106d5778c29 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -16,6 +16,7 @@ #include "DIE.h" #include "llvm/Constants.h" #include "llvm/Module.h" +#include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -24,12 +25,13 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" @@ -38,7 +40,7 @@ #include "llvm/Support/ValueHandle.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Timer.h" -#include "llvm/System/Path.h" +#include "llvm/Support/Path.h" using namespace llvm; static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden, @@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absense of debug location information explicit."), cl::init(false)); +#ifndef NDEBUG +STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number"); +#endif + namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -507,8 +513,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { return; unsigned Line = V.getLineNumber(); - unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(), - V.getContext().getFilename()); + if (Line == 0) + return; + unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -522,8 +529,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { return; unsigned Line = G.getLineNumber(); - unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(), - G.getContext().getFilename()); + if (Line == 0) + return; + unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -542,8 +550,7 @@ void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { unsigned Line = SP.getLineNumber(); if (!SP.getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(SP.getDirectory(), - SP.getFilename()); + unsigned FileID = GetOrCreateSourceID(SP.getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -557,10 +564,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { return; unsigned Line = Ty.getLineNumber(); - if (!Ty.getContext().Verify()) + if (Line == 0 || !Ty.getContext().Verify()) return; - unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(), - Ty.getContext().getFilename()); + unsigned FileID = GetOrCreateSourceID(Ty.getFilename()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -574,10 +580,11 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { return; unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; StringRef FN = NS.getFilename(); - StringRef Dir = NS.getDirectory(); - unsigned FileID = GetOrCreateSourceID(Dir, FN); + unsigned FileID = GetOrCreateSourceID(FN); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -588,8 +595,8 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { MachineLocation Location; unsigned FrameReg; - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); Location.set(FrameReg, Offset); if (DV->variableHasComplexAddress()) @@ -620,8 +627,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, if (Reg < 32) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); } else { - Reg = Reg - dwarf::DW_OP_reg0; - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } } else { @@ -760,8 +766,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, if (Reg < 32) addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); else { - Reg = Reg - dwarf::DW_OP_reg0; - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } } else { @@ -812,6 +817,15 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + if (RI->getFrameRegister(*Asm->MF) == Location.getReg() + && Location.getOffset()) { + // If variable offset is based in frame register then use fbreg. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + addBlock(Die, Attribute, 0, Block); + return; + } + if (Location.isReg()) { if (Reg < 32) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); @@ -834,35 +848,28 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, } /// addRegisterAddress - Add register location entry in variable DIE. -bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS, - const MachineOperand &MO) { +bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) { assert (MO.isReg() && "Invalid machine operand!"); if (!MO.getReg()) return false; MachineLocation Location; Location.set(MO.getReg()); addAddress(Die, dwarf::DW_AT_location, Location); - if (VS) - addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); return true; } /// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS, - const MachineOperand &MO) { +bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) { assert (MO.isImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); unsigned Imm = MO.getImm(); addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - if (VS) - addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); return true; } /// addConstantFPValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS, - const MachineOperand &MO) { +bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) { assert (MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -883,11 +890,42 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS, (unsigned char)0xFF & FltPtr[Start]); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - if (VS) - addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS); return true; } +/// addConstantValue - Add constant value entry in variable DIE. +bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI, + bool Unsigned) { + if (CI->getBitWidth() <= 64) { + if (Unsigned) + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + return true; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const APInt Val = CI->getValue(); + const char *Ptr = (const char*)Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & Ptr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} /// addToContextOwner - Add Die into the list of its context owner's children. void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { @@ -898,8 +936,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); } else if (Context.isSubprogram()) { - DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context), - /*MakeDecl=*/false); + DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context)); ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); @@ -1033,16 +1070,23 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIDescriptor RTy = Elements.getElement(0); addType(&Buffer, DIType(RTy)); - // Add prototype flag. - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); - + bool isPrototyped = true; // Add arguments. for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - addType(Arg, DIType(Ty)); - Buffer.addChild(Arg); + if (Ty.isUnspecifiedParameter()) { + DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); + Buffer.addChild(Arg); + isPrototyped = false; + } else { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Ty)); + Buffer.addChild(Arg); + } } + // Add prototype flag. + if (isPrototyped) + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); } break; case dwarf::DW_TAG_structure_type: @@ -1060,8 +1104,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { for (unsigned i = 0; i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; - if (Element.isSubprogram()) + if (Element.isSubprogram()) { + DISubprogram SP(Element); ElemDie = createSubprogramDIE(DISubprogram(Element)); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + } else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); @@ -1094,6 +1151,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIDescriptor Context = CTy.getContext(); addToContextOwner(&Buffer, Context); } + + if (Tag == dwarf::DW_TAG_class_type) { + DIArray TParams = CTy.getTemplateParams(); + unsigned N = TParams.getNumElements(); + // Add template parameters. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + Buffer.addChild(getOrCreateTemplateTypeParameterDIE( + DITemplateTypeParameter(Element))); + else if (Element.isTemplateValueParameter()) + Buffer.addChild(getOrCreateTemplateValueParameterDIE( + DITemplateValueParameter(Element))); + } + } break; } default: @@ -1124,6 +1196,38 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateTypeParameter. +DIE * +DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { + CompileUnit *TypeCU = getCompileUnit(TP); + DIE *ParamDIE = TypeCU->getDIE(TP); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); + addType(ParamDIE, TP.getType()); + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + return ParamDIE; +} + +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateValueParameter. +DIE * +DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { + CompileUnit *TVCU = getCompileUnit(TPV); + DIE *ParamDIE = TVCU->getDIE(TPV); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + addType(ParamDIE, TPV.getType()); + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + TPV.getValue()); + return ParamDIE; +} + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ int64_t L = SR.getLo(); @@ -1258,7 +1362,8 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { else if (DT.isPrivate()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_private); - else if (DT.getTag() == dwarf::DW_TAG_inheritance) + // Otherwise C++ member and base classes are considered public. + else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_public); if (DT.isVirtual()) @@ -1268,7 +1373,7 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { } /// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) { +DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { CompileUnit *SPCU = getCompileUnit(SP); DIE *SPDie = SPCU->getDIE(SP); if (SPDie) @@ -1286,10 +1391,7 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) { addSourceLine(SPDie, SP); - // Add prototyped tag, if C or ObjC. - unsigned Lang = SP.getCompileUnit().getLanguage(); - if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || - Lang == dwarf::DW_LANG_ObjC) + if (SP.isPrototyped()) addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. @@ -1307,13 +1409,13 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); + addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } - if (MakeDecl || !SP.isDefinition()) { + if (!SP.isDefinition()) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. Do not add arguments for subprogram definition. They will @@ -1603,6 +1705,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + else if (DIVariable(DV->getVariable()).isArtificial()) + addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { DV->setDIE(VariableDie); @@ -1625,7 +1729,6 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { DbgVariableToDbgInstMap.find(DV); if (DVI != DbgVariableToDbgInstMap.end()) { const MachineInstr *DVInsn = DVI->second; - const MCSymbol *DVLabel = findVariableLabel(DV); bool updated = false; // FIXME : Handle getNumOperands != 3 if (DVInsn->getNumOperands() == 3) { @@ -1637,20 +1740,17 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); updated = true; } else - updated = addRegisterAddress(VariableDie, DVLabel, RegOp); + updated = addRegisterAddress(VariableDie, RegOp); } else if (DVInsn->getOperand(0).isImm()) - updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = addConstantValue(VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isFPImm()) updated = - addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); } else { MachineLocation Location = Asm->getDebugValueLocation(DVInsn); if (Location.getReg()) { addAddress(VariableDie, dwarf::DW_AT_location, Location); - if (DVLabel) - addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, - DVLabel); updated = true; } } @@ -1700,6 +1800,16 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; + SmallVector <DIE *, 8> Children; + // Collect lexical scope childrens first. + const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) + if (DIE *Variable = constructVariableDIE(Variables[i], Scope)) + Children.push_back(Variable); + const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes(); + for (unsigned j = 0, M = Scopes.size(); j < M; ++j) + if (DIE *Nested = constructScopeDIE(Scopes[j])) + Children.push_back(Nested); DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) @@ -1715,26 +1825,19 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { else ScopeDIE = updateSubprogramScopeDIE(DS); } - else + else { + // There is no need to emit empty lexical block DIE. + if (Children.empty()) + return NULL; ScopeDIE = constructLexicalScopeDIE(Scope); - if (!ScopeDIE) return NULL; - - // Add variables to scope. - const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables(); - for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); - if (VariableDIE) - ScopeDIE->addChild(VariableDIE); } + + if (!ScopeDIE) return NULL; - // Add nested scopes. - const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes(); - for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { - // Define the Scope debug information entry. - DIE *NestedDIE = constructScopeDIE(Scopes[j]); - if (NestedDIE) - ScopeDIE->addChild(NestedDIE); - } + // Add children + for (SmallVector<DIE *, 8>::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + ScopeDIE->addChild(*I); if (DS.isSubprogram()) addPubTypes(DISubprogram(DS)); @@ -1746,37 +1849,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){ - unsigned DId; - assert (DirName.empty() == false && "Invalid directory name!"); - StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName); - if (DI != DirectoryIdMap.end()) { - DId = DI->getValue(); - } else { - DId = DirectoryNames.size() + 1; - DirectoryIdMap[DirName] = DId; - DirectoryNames.push_back(DirName); - } +unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return GetOrCreateSourceID("<stdin>"); - unsigned FId; - StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName); - if (FI != SourceFileIdMap.end()) { - FId = FI->getValue(); - } else { - FId = SourceFileNames.size() + 1; - SourceFileIdMap[FileName] = FId; - SourceFileNames.push_back(FileName); - } + StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); + if (Entry.getValue()) + return Entry.getValue(); - DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI = - SourceIdMap.find(std::make_pair(DId, FId)); - if (SI != SourceIdMap.end()) - return SI->second; + unsigned SrcId = SourceIdMap.size(); + Entry.setValue(SrcId); - unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0. - SourceIdMap[std::make_pair(DId, FId)] = SrcId; - SourceIds.push_back(std::make_pair(DId, FId)); + // Print out a .file directive to specify files for .loc directives. + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName); return SrcId; } @@ -1802,7 +1889,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(Dir, FN); + unsigned ID = GetOrCreateSourceID(FN); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, @@ -1886,6 +1973,32 @@ static bool isUnsignedDIType(DIType Ty) { return false; } +// Return const exprssion if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return NULL; + + // First operand points to a global value. + if (!isa<GlobalValue>(CE->getOperand(0))) + return NULL; + + // Second operand is zero. + const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return NULL; + + // Third operand is offset. + if (!isa<ConstantInt>(CE->getOperand(2))) + return NULL; + + return CE; +} + /// constructGlobalVariableDIE - Construct global variable DIE. void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { DIGlobalVariable GV(N); @@ -1952,16 +2065,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } - } else if (Constant *C = GV.getConstant()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) { - if (isUnsignedDIType(GTy)) - addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - CI->getZExtValue()); - else - addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - CI->getSExtValue()); - } + } else if (ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(GV.getConstant())) + addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); + else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + // GV is a merged global. + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0)))); + ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2)); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } + return; } @@ -2043,25 +2162,12 @@ void DwarfDebug::beginModule(Module *M) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); - - // Print out .file directives to specify files for .loc directives. These are - // printed out early so that they precede any .loc directives. - if (Asm->MAI->hasDotLocAndDotFile()) { - for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) { - // Remember source id starts at 1. - std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i); - // FIXME: don't use sys::path for this! This should not depend on the - // host. - sys::Path FullPath(getSourceDirectoryName(Id.first)); - bool AppendOk = - FullPath.appendComponent(getSourceFileName(Id.second)); - assert(AppendOk && "Could not append filename to directory!"); - AppendOk = false; - Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str()); - } - } } /// endModule - Emit all Dwarf sections that should come after the content. @@ -2081,8 +2187,7 @@ void DwarfDebug::endModule() { StringRef FName = SP.getLinkageName(); if (FName.empty()) FName = SP.getName(); - NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); + NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName); if (!NMD) continue; unsigned E = NMD->getNumOperands(); if (!E) continue; @@ -2152,9 +2257,6 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit source line correspondence into a debug line section. - emitDebugLines(); - // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -2242,15 +2344,6 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, } } -/// isDbgValueInUndefinedReg - Return true if debug value, encoded by -/// DBG_VALUE instruction, is in undefined reg. -static bool isDbgValueInUndefinedReg(const MachineInstr *MI) { - assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg()) - return true; - return false; -} - /// isDbgValueInDefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { @@ -2275,7 +2368,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MInsn = II; - if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn)) + if (!MInsn->isDebugValue()) continue; DbgValues.push_back(MInsn); } @@ -2297,19 +2390,18 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, ME = DbgValues.end(); MI != ME; ++MI) { const MDNode *Var = (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && isDbgValueInDefinedReg(*MI) && + if (Var == DV && !PrevMI->isIdenticalTo(*MI)) MultipleValues.push_back(*MI); PrevMI = *MI; } - DbgScope *Scope = findDbgScope(MInsn); - bool CurFnArg = false; + DbgScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) - CurFnArg = true; - if (!Scope && CurFnArg) Scope = CurrentFnDbgScope; + else + Scope = findDbgScope(MInsn); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -2317,8 +2409,6 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, Processed.insert(DV); DbgVariable *RegVar = new DbgVariable(DV); Scope->addVariable(RegVar); - if (!CurFnArg) - DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; VarToAbstractVarMap[RegVar] = AbsVar; @@ -2375,10 +2465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, // Collect info for variables that were optimized out. const Function *F = MF->getFunction(); - const Module *M = F->getParent(); - if (NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", - getRealLinkageName(F->getName())))) { + if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIVariable DV(cast<MDNode>(NMD->getOperand(i))); if (!DV || !Processed.insert(DV)) @@ -2409,8 +2496,8 @@ const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { return I->second; } -/// beginScope - Process beginning of a scope. -void DwarfDebug::beginScope(const MachineInstr *MI) { +/// beginInstruction - Process beginning of an instruction. +void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (InsnNeedsLabel.count(MI) == 0) { LabelsBeforeInsn[MI] = PrevLabel; return; @@ -2444,8 +2531,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) { assert (0 && "Instruction is not processed!"); } -/// endScope - Process end of a scope. -void DwarfDebug::endScope(const MachineInstr *MI) { +/// endInstruction - Process end of an instruction. +void DwarfDebug::endInstruction(const MachineInstr *MI) { if (InsnsEndScopeSet.count(MI) != 0) { // Emit a label if this instruction ends a scope. MCSymbol *Label = MMI->getContext().CreateTempSymbol(); @@ -2624,6 +2711,10 @@ bool DwarfDebug::extractScopeInformation() { continue; } + // Ignore DBG_VALUE. It does not contribute any instruction in output. + if (MInsn->isDebugValue()) + continue; + if (RangeBeginMI) { // If we have alread seen a beginning of a instruction range and // current instruction scope does not match scope of first instruction @@ -2727,12 +2818,37 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) { return DebugLoc(); } +#ifndef NDEBUG +/// CheckLineNumbers - Count basicblocks whose instructions do not have any +/// line number information. +static void CheckLineNumbers(const MachineFunction *MF) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + bool FoundLineNo = false; + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MI = II; + if (!MI->getDebugLoc().isUnknown()) { + FoundLineNo = true; + break; + } + } + if (!FoundLineNo && I->size()) + ++BlocksWithoutLineNo; + } +} +#endif + /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; if (!extractScopeInformation()) return; +#ifndef NDEBUG + CheckLineNumbers(MF); +#endif + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. @@ -2775,16 +2891,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); if (!DV.Verify()) continue; // If DBG_VALUE is for a local variable then it needs a label. - if (DV.getTag() != dwarf::DW_TAG_arg_variable - && isDbgValueInUndefinedReg(MI) == false) + if (DV.getTag() != dwarf::DW_TAG_arg_variable) InsnNeedsLabel.insert(MI); // DBG_VALUE for inlined functions argument needs a label. else if (!DISubprogram(getDISubprogram(DV.getContext())). describes(MF->getFunction())) InsnNeedsLabel.insert(MI); // DBG_VALUE indicating argument location change needs a label. - else if (isDbgValueInUndefinedReg(MI) == false - && !ProcessedArgs.insert(DV)) + else if (!ProcessedArgs.insert(DV)) InsnNeedsLabel.insert(MI); } else { // If location is unknown then instruction needs a location only if @@ -2820,17 +2934,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { SmallPtrSet<const MDNode *, 16> ProcessedVars; collectVariableInfo(MF, ProcessedVars); - // Get function line info. - if (!Lines.empty()) { - // Get section line info. - unsigned ID = SectionMap.insert(Asm->getCurrentSection()); - if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); - std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1]; - // Append the function info to section info. - SectionLineInfos.insert(SectionLineInfos.end(), - Lines.begin(), Lines.end()); - } - // Construct abstract scopes. for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(), AE = AbstractScopesList.end(); AI != AE; ++AI) { @@ -2840,10 +2943,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { StringRef FName = SP.getLinkageName(); if (FName.empty()) FName = SP.getName(); - const Module *M = MF->getFunction()->getParent(); - if (NamedMDNode *NMD = - M->getNamedMetadata(Twine("llvm.dbg.lv.", - getRealLinkageName(FName)))) { + if (NamedMDNode *NMD = + getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIVariable DV(cast<MDNode>(NMD->getOperand(i))); if (!DV || !ProcessedVars.insert(DV)) @@ -2875,7 +2976,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); - DbgVariableLabelsMap.clear(); DeleteContainerSeconds(DbgScopeMap); InsnsEndScopeSet.clear(); ConcreteScopes.clear(); @@ -2884,7 +2984,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { AbstractVariables.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); - Lines.clear(); PrevLabel = NULL; } @@ -2906,15 +3005,6 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) { return true; } -/// findVariableLabel - Find MCSymbol for the variable. -const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) { - DenseMap<const DbgVariable *, const MCSymbol *>::iterator I - = DbgVariableLabelsMap.find(V); - if (I == DbgVariableLabelsMap.end()) - return NULL; - else return I->second; -} - /// findDbgScope - Find DbgScope for the debug loc attached with an /// instruction. DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { @@ -2940,7 +3030,6 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// the source line list. MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { - StringRef Dir; StringRef Fn; unsigned Src = 1; @@ -2949,25 +3038,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, if (Scope.isCompileUnit()) { DICompileUnit CU(S); - Dir = CU.getDirectory(); Fn = CU.getFilename(); + } else if (Scope.isFile()) { + DIFile F(S); + Fn = F.getFilename(); } else if (Scope.isSubprogram()) { DISubprogram SP(S); - Dir = SP.getDirectory(); Fn = SP.getFilename(); } else if (Scope.isLexicalBlock()) { DILexicalBlock DB(S); - Dir = DB.getDirectory(); Fn = DB.getFilename(); } else assert(0 && "Unexpected scope info"); - Src = GetOrCreateSourceID(Dir, Fn); + Src = GetOrCreateSourceID(Fn); } - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); + Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT, + 0, 0); + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Asm->OutStreamer.EmitLabel(Label); return Label; } @@ -3151,6 +3241,14 @@ void DwarfDebug::emitDIE(DIE *Die) { Values[i]->EmitValue(Asm, Form); break; } + case dwarf::DW_AT_accessibility: { + if (Asm->isVerbose()) { + DIEInteger *V = cast<DIEInteger>(Values[i]); + Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue())); + } + Values[i]->EmitValue(Asm, Form); + break; + } default: // Emit an attribute using the defined form. Values[i]->EmitValue(Asm, Form); @@ -3270,185 +3368,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -/// emitDebugLines - Emit source line information. -/// -void DwarfDebug::emitDebugLines() { - // If the target is using .loc/.file, the assembler will be emitting the - // .debug_line table automatically. - if (Asm->MAI->hasDotLocAndDotFile()) - return; - - // Minimum line delta, thus ranging from -10..(255-10). - const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1); - // Maximum line delta, thus ranging from -10..(255-10). - const int MaxLineDelta = 255 + MinLineDelta; - - // Start the dwarf line section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfLineSection()); - - // Construct the section header. - Asm->OutStreamer.AddComment("Length of Source Line Info"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), - Asm->GetTempSymbol("line_begin"), 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin")); - - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Prolog Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"), - Asm->GetTempSymbol("line_prolog_begin"), 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin")); - - Asm->OutStreamer.AddComment("Minimum Instruction Length"); - Asm->EmitInt8(1); - Asm->OutStreamer.AddComment("Default is_stmt_start flag"); - Asm->EmitInt8(1); - Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)"); - Asm->EmitInt8(MinLineDelta); - Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)"); - Asm->EmitInt8(MaxLineDelta); - Asm->OutStreamer.AddComment("Special Opcode Base"); - Asm->EmitInt8(-MinLineDelta); - - // Line number standard opcode encodings argument count - Asm->OutStreamer.AddComment("DW_LNS_copy arg count"); - Asm->EmitInt8(0); - Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count"); - Asm->EmitInt8(1); - Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count"); - Asm->EmitInt8(1); - Asm->OutStreamer.AddComment("DW_LNS_set_file arg count"); - Asm->EmitInt8(1); - Asm->OutStreamer.AddComment("DW_LNS_set_column arg count"); - Asm->EmitInt8(1); - Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count"); - Asm->EmitInt8(0); - Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count"); - Asm->EmitInt8(0); - Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count"); - Asm->EmitInt8(0); - Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count"); - Asm->EmitInt8(1); - - // Emit directories. - for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) { - const std::string &Dir = getSourceDirectoryName(DI); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory"); - Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0); - } - - Asm->OutStreamer.AddComment("End of directories"); - Asm->EmitInt8(0); - - // Emit files. - for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) { - // Remember source id starts at 1. - std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI); - const std::string &FN = getSourceFileName(Id.second); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source"); - Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0); - - Asm->EmitULEB128(Id.first, "Directory #"); - Asm->EmitULEB128(0, "Mod date"); - Asm->EmitULEB128(0, "File size"); - } - - Asm->OutStreamer.AddComment("End of files"); - Asm->EmitInt8(0); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end")); - - // A sequence for each text section. - unsigned SecSrcLinesSize = SectionSourceLines.size(); - - for (unsigned j = 0; j < SecSrcLinesSize; ++j) { - // Isolate current sections line info. - const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j]; - - // Dwarf assumes we start with first line of first source file. - unsigned Source = 1; - unsigned Line = 1; - - // Construct rows of the address, source, line, column matrix. - for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) { - const SrcLineInfo &LineInfo = LineInfos[i]; - MCSymbol *Label = LineInfo.getLabel(); - if (!Label->isDefined()) continue; // Not emitted, in dead code. - - if (Asm->isVerbose()) { - std::pair<unsigned, unsigned> SrcID = - getSourceDirectoryAndFileIds(LineInfo.getSourceID()); - Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) + - "/" + - Twine(getSourceFileName(SrcID.second)) + - ":" + Twine(LineInfo.getLine())); - } - - // Define the line address. - Asm->OutStreamer.AddComment("Extended Op"); - Asm->EmitInt8(0); - Asm->OutStreamer.AddComment("Op size"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1); - - Asm->OutStreamer.AddComment("DW_LNE_set_address"); - Asm->EmitInt8(dwarf::DW_LNE_set_address); - - Asm->OutStreamer.AddComment("Location label"); - Asm->OutStreamer.EmitSymbolValue(Label, - Asm->getTargetData().getPointerSize(), - 0/*AddrSpace*/); - - // If change of source, then switch to the new source. - if (Source != LineInfo.getSourceID()) { - Source = LineInfo.getSourceID(); - Asm->OutStreamer.AddComment("DW_LNS_set_file"); - Asm->EmitInt8(dwarf::DW_LNS_set_file); - Asm->EmitULEB128(Source, "New Source"); - } - - // If change of line. - if (Line != LineInfo.getLine()) { - // Determine offset. - int Offset = LineInfo.getLine() - Line; - int Delta = Offset - MinLineDelta; - - // Update line. - Line = LineInfo.getLine(); - - // If delta is small enough and in range... - if (Delta >= 0 && Delta < (MaxLineDelta - 1)) { - // ... then use fast opcode. - Asm->OutStreamer.AddComment("Line Delta"); - Asm->EmitInt8(Delta - MinLineDelta); - } else { - // ... otherwise use long hand. - Asm->OutStreamer.AddComment("DW_LNS_advance_line"); - Asm->EmitInt8(dwarf::DW_LNS_advance_line); - Asm->EmitSLEB128(Offset, "Line Offset"); - Asm->OutStreamer.AddComment("DW_LNS_copy"); - Asm->EmitInt8(dwarf::DW_LNS_copy); - } - } else { - // Copy the previous row (different address or source) - Asm->OutStreamer.AddComment("DW_LNS_copy"); - Asm->EmitInt8(dwarf::DW_LNS_copy); - } - } - - emitEndOfLineMatrix(j + 1); - } - - if (SecSrcLinesSize == 0) - // Because we're emitting a debug_line section, we still need a line - // table. The linker and friends expect it to exist. If there's nothing to - // put into it, emit an empty table. - emitEndOfLineMatrix(1); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end")); -} - /// emitCommonDebugFrame - Emit common frame info into a debug frame section. /// void DwarfDebug::emitCommonDebugFrame() { @@ -3456,8 +3375,8 @@ void DwarfDebug::emitCommonDebugFrame() { return; int stackGrowth = Asm->getTargetData().getPointerSize(); - if (Asm->TM.getFrameInfo()->getStackGrowthDirection() == - TargetFrameInfo::StackGrowsDown) + if (Asm->TM.getFrameLowering()->getStackGrowthDirection() == + TargetFrameLowering::StackGrowsDown) stackGrowth *= -1; // Start the dwarf frame section. @@ -3480,10 +3399,11 @@ void DwarfDebug::emitCommonDebugFrame() { Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); Asm->OutStreamer.AddComment("CIE RA Column"); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false)); std::vector<MachineMove> Moves; - RI->getInitialFrameState(Moves); + TFI->getInitialFrameState(Moves); Asm->EmitFrameMoves(Moves, 0, false); @@ -3667,6 +3587,14 @@ void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; + for (SmallVector<DotDebugLocEntry, 4>::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I) { + DotDebugLocEntry &Entry = *I; + if (I + 1 != DotDebugLocEntries.end()) + Entry.Merge(I+1); + } + // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); @@ -3676,7 +3604,8 @@ void DwarfDebug::emitDebugLoc() { for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I, ++index) { - DotDebugLocEntry Entry = *I; + DotDebugLocEntry &Entry = *I; + if (Entry.isMerged()) continue; if (Entry.isEmpty()) { Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index f0ff3bc71699..7df0510fbfba 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -23,6 +23,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/DebugLoc.h" namespace llvm { @@ -51,6 +52,8 @@ class DIType; class DINameSpace; class DISubrange; class DICompositeType; +class DITemplateTypeParameter; +class DITemplateValueParameter; //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. @@ -71,6 +74,28 @@ public: MCSymbol *getLabel() const { return Label; } }; +/// DotDebugLocEntry - This struct describes location entries emitted in +/// .debug_loc section. +typedef struct DotDebugLocEntry { + const MCSymbol *Begin; + const MCSymbol *End; + MachineLocation Loc; + bool Merged; + DotDebugLocEntry() : Begin(0), End(0), Merged(false) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) + : Begin(B), End(E), Loc(L), Merged(false) {} + /// Empty entries are also used as a trigger to emit temp label. Such + /// labels are referenced is used to find debug_loc offset for a given DIE. + bool isEmpty() { return Begin == 0 && End == 0; } + bool isMerged() { return Merged; } + void Merge(DotDebugLocEntry *Next) { + if (!(Begin && Loc == Next->Loc && End == Next->Begin)) + return; + Next->Begin = Begin; + Merged = true; + } +} DotDebugLocEntry; + class DwarfDebug { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -93,30 +118,9 @@ class DwarfDebug { /// std::vector<DIEAbbrev *> Abbreviations; - /// DirectoryIdMap - Directory name to directory id map. - /// - StringMap<unsigned> DirectoryIdMap; - - /// DirectoryNames - A list of directory names. - SmallVector<std::string, 8> DirectoryNames; - - /// SourceFileIdMap - Source file name to source file id map. - /// - StringMap<unsigned> SourceFileIdMap; - - /// SourceFileNames - A list of source file names. - SmallVector<std::string, 8> SourceFileNames; - /// SourceIdMap - Source id map, i.e. pair of directory id and source file /// id mapped to a unique id. - DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap; - - /// SourceIds - Reverse map from source id to directory id + file id pair. - /// - SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds; - - /// Lines - List of source line correspondence. - std::vector<SrcLineInfo> Lines; + StringMap<unsigned> SourceIdMap; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -135,10 +139,6 @@ class DwarfDebug { /// UniqueVector<const MCSection*> SectionMap; - /// SectionSourceLines - Tracks line numbers per text section. - /// - std::vector<std::vector<SrcLineInfo> > SectionSourceLines; - // CurrentFnDbgScope - Top level scope for the current function. // DbgScope *CurrentFnDbgScope; @@ -175,23 +175,6 @@ class DwarfDebug { /// machine instruction. DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap; - /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol. - DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap; - - /// DotDebugLocEntry - This struct describes location entries emitted in - /// .debug_loc section. - typedef struct DotDebugLocEntry { - const MCSymbol *Begin; - const MCSymbol *End; - MachineLocation Loc; - DotDebugLocEntry() : Begin(0), End(0) {} - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, - MachineLocation &L) : Begin(B), End(E), Loc(L) {} - /// Empty entries are also used as a trigger to emit temp label. Such - /// labels are referenced is used to find debug_loc offset for a given DIE. - bool isEmpty() { return Begin == 0 && End == 0; } - } DotDebugLocEntry; - /// DotDebugLocEntries - Collection of DotDebugLocEntry. SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries; @@ -265,35 +248,10 @@ class DwarfDebug { DIEInteger *DIEIntegerOne; private: - - /// getSourceDirectoryAndFileIds - Return the directory and file ids that - /// maps to the source id. Source id starts at 1. - std::pair<unsigned, unsigned> - getSourceDirectoryAndFileIds(unsigned SId) const { - return SourceIds[SId-1]; - } - - /// getNumSourceDirectories - Return the number of source directories in the - /// debug info. - unsigned getNumSourceDirectories() const { - return DirectoryNames.size(); - } - - /// getSourceDirectoryName - Return the name of the directory corresponding - /// to the id. - const std::string &getSourceDirectoryName(unsigned Id) const { - return DirectoryNames[Id - 1]; - } - - /// getSourceFileName - Return the name of the source file corresponding - /// to the id. - const std::string &getSourceFileName(unsigned Id) const { - return SourceFileNames[Id - 1]; - } /// getNumSourceIds - Return the number of unique source ids. unsigned getNumSourceIds() const { - return SourceIds.size(); + return SourceIdMap.size(); } /// assignAbbrevNumber - Define a unique number for the abbreviation. @@ -349,13 +307,14 @@ private: const MachineLocation &Location); /// addRegisterAddress - Add register location entry in variable DIE. - bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); + bool addRegisterAddress(DIE *Die, const MachineOperand &MO); /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); + bool addConstantValue(DIE *Die, const MachineOperand &MO); + bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO); + bool addConstantFPValue(DIE *Die, const MachineOperand &MO); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable @@ -393,6 +352,14 @@ private: /// given DIType. DIE *getOrCreateTypeDIE(DIType Ty); + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateTypeParameter. + DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateValueParameter. + DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + void addPubTypes(DISubprogram SP); /// constructTypeDIE - Construct basic type die from DIBasicType. @@ -421,7 +388,7 @@ private: DIE *createMemberDIE(DIDerivedType DT); /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false); + DIE *createSubprogramDIE(DISubprogram SP); /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -481,10 +448,6 @@ private: /// void emitEndOfLineMatrix(unsigned SectionEnd); - /// emitDebugLines - Emit source line information. - /// - void emitDebugLines(); - /// emitCommonDebugFrame - Emit common frame info into a debug frame section. /// void emitCommonDebugFrame(); @@ -543,9 +506,8 @@ private: /// GetOrCreateSourceID - Look up the source id with the given directory and /// source file names. If none currently exists, create a new id and insert it - /// in the SourceIds map. This can update DirectoryNames and SourceFileNames - /// maps as well. - unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); + /// in the SourceIds map. + unsigned GetOrCreateSourceID(StringRef FullName); /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. @@ -565,12 +527,6 @@ private: /// the source line list. MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); - /// getSourceLineCount - Return the number of source lines in the debug - /// info. - unsigned getSourceLineCount() const { - return Lines.size(); - } - /// recordVariableFrameIndex - Record a variable's index. void recordVariableFrameIndex(const DbgVariable *V, int Index); @@ -578,9 +534,6 @@ private: /// is found. Update FI to hold value of the index. bool findVariableFrameIndex(const DbgVariable *V, int *FI); - /// findVariableLabel - Find MCSymbol for the variable. - const MCSymbol *findVariableLabel(const DbgVariable *V); - /// findDbgScope - Find DbgScope for the debug loc attached with an /// instruction. DbgScope *findDbgScope(const MachineInstr *MI); @@ -630,11 +583,11 @@ public: /// getLabelAfterInsn - Return Label immediately following the instruction. const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - /// beginScope - Process beginning of a scope. - void beginScope(const MachineInstr *MI); + /// beginInstruction - Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI); - /// endScope - Prcess end of a scope. - void endScope(const MachineInstr *MI); + /// endInstruction - Prcess end of an instruction. + void endInstruction(const MachineInstr *MI); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 86a368831e0e..967a2783da14 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -26,7 +26,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -39,238 +39,10 @@ using namespace llvm; DwarfException::DwarfException(AsmPrinter *A) - : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false), - shouldEmitTableModule(false), shouldEmitMovesModule(false) {} + : Asm(A), MMI(Asm->MMI) {} DwarfException::~DwarfException() {} -/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that -/// is shared among many Frame Description Entries. There is at least one CIE -/// in every non-empty .debug_frame section. -void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { - // Size and sign of stack growth. - int stackGrowth = Asm->getTargetData().getPointerSize(); - if (Asm->TM.getFrameInfo()->getStackGrowthDirection() == - TargetFrameInfo::StackGrowsDown) - stackGrowth *= -1; - - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - // Begin eh frame section. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); - - MCSymbol *EHFrameSym; - if (TLOF.isFunctionEHFrameSymbolPrivate()) - EHFrameSym = Asm->GetTempSymbol("EH_frame", Index); - else - EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + - Twine(Index)); - Asm->OutStreamer.EmitLabel(EHFrameSym); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index)); - - // Define base labels. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index)); - - // Define the eh frame length. - Asm->OutStreamer.AddComment("Length of Common Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index), - Asm->GetTempSymbol("eh_frame_common_begin", Index), - 4); - - // EH frame header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index)); - Asm->OutStreamer.AddComment("CIE Identifier Tag"); - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); - Asm->OutStreamer.AddComment("DW_CIE_VERSION"); - Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/); - - // The personality presence indicates that language specific information will - // show up in the eh frame. Find out how we are supposed to lower the - // personality function reference: - - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); - unsigned PerEncoding = TLOF.getPersonalityEncoding(); - - char Augmentation[6] = { 0 }; - unsigned AugmentationSize = 0; - char *APtr = Augmentation + 1; - - if (PersonalityFn) { - // There is a personality function. - *APtr++ = 'P'; - AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding); - } - - if (UsesLSDA[Index]) { - // An LSDA pointer is in the FDE augmentation. - *APtr++ = 'L'; - ++AugmentationSize; - } - - if (FDEEncoding != dwarf::DW_EH_PE_absptr) { - // A non-default pointer encoding for the FDE. - *APtr++ = 'R'; - ++AugmentationSize; - } - - if (APtr != Augmentation + 1) - Augmentation[0] = 'z'; - - Asm->OutStreamer.AddComment("CIE Augmentation"); - Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0); - - // Round out reader. - Asm->EmitULEB128(1, "CIE Code Alignment Factor"); - Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); - Asm->OutStreamer.AddComment("CIE Return Address Column"); - - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); - - if (Augmentation[0]) { - Asm->EmitULEB128(AugmentationSize, "Augmentation Size"); - - // If there is a personality, we need to indicate the function's location. - if (PersonalityFn) { - Asm->EmitEncodingByte(PerEncoding, "Personality"); - Asm->OutStreamer.AddComment("Personality"); - Asm->EmitReference(PersonalityFn, PerEncoding); - } - if (UsesLSDA[Index]) - Asm->EmitEncodingByte(LSDAEncoding, "LSDA"); - if (FDEEncoding != dwarf::DW_EH_PE_absptr) - Asm->EmitEncodingByte(FDEEncoding, "FDE"); - } - - // Indicate locations of general callee saved registers in frame. - std::vector<MachineMove> Moves; - RI->getInitialFrameState(Moves); - Asm->EmitFrameMoves(Moves, 0, true); - - // On Darwin the linker honors the alignment of eh_frame, which means it must - // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get - // holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index)); -} - -/// EmitFDE - Emit the Frame Description Entry (FDE) for the function. -void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { - assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && - "Should not emit 'available externally' functions at all"); - - const Function *TheFunc = EHFrameInfo.function; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - - unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); - - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); - - // Externally visible entry into the functions eh frame info. If the - // corresponding function is static, this should not be externally visible. - if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global); - - // If corresponding function is weak definition, this should be too. - if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - MCSA_WeakDefinition); - - // If corresponding function is hidden, this should be too. - if (TheFunc->hasHiddenVisibility()) - if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - HiddenAttr); - - // If there are no calls then you can't unwind. This may mean we can omit the - // EH Frame, but some environments do not handle weak absolute symbols. If - // UnwindTablesMandatory is set we cannot do this optimization; the unwind - // info is to be available for non-EH uses. - if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory && - (!TheFunc->isWeakForLinker() || - !Asm->MAI->getWeakDefDirective() || - TLOF.getSupportsWeakOmittedEHFrame())) { - Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym, - MCConstantExpr::Create(0, Asm->OutContext)); - // This name has no connection to the function, so it might get - // dead-stripped when the function is not, erroneously. Prohibit - // dead-stripping unconditionally. - if (Asm->MAI->hasNoDeadStrip()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - MCSA_NoDeadStrip); - } else { - Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym); - - // EH frame header. - Asm->OutStreamer.AddComment("Length of Frame Information Entry"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number), - Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin", - EHFrameInfo.Number)); - - Asm->OutStreamer.AddComment("FDE CIE offset"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), - Asm->GetTempSymbol("eh_frame_common", - EHFrameInfo.PersonalityIndex), 4); - - MCSymbol *EHFuncBeginSym = - Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number); - - Asm->OutStreamer.AddComment("FDE initial location"); - Asm->EmitReference(EHFuncBeginSym, FDEEncoding); - - Asm->OutStreamer.AddComment("FDE address range"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end", - EHFrameInfo.Number), - EHFuncBeginSym, - Asm->GetSizeOfEncodedValue(FDEEncoding)); - - // If there is a personality and landing pads then point to the language - // specific data area in the exception table. - if (MMI->getPersonalities()[0] != NULL) { - unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding); - - Asm->EmitULEB128(Size, "Augmentation size"); - Asm->OutStreamer.AddComment("Language Specific Data Area"); - if (EHFrameInfo.hasLandingPads) - Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number), - LSDAEncoding); - else - Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/); - - } else { - Asm->EmitULEB128(0, "Augmentation size"); - } - - // Indicate locations of function specific callee saved registers in frame. - Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true); - - // On Darwin the linker honors the alignment of eh_frame, which means it - // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you - // get holes which confuse readers of eh_frame. - Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end", - EHFrameInfo.Number)); - - // If the function is marked used, this table should be also. We cannot - // make the mark unconditional in this case, since retaining the table also - // retains the function in this case, and there is code around that depends - // on unused functions (calling undefined externals) being dead-stripped to - // link correctly. Yes, there really is. - if (MMI->isUsedFunction(EHFrameInfo.function)) - if (Asm->MAI->hasNoDeadStrip()) - Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, - MCSA_NoDeadStrip); - } - Asm->OutStreamer.AddBlankLine(); -} - /// SharedTypeIds - How many leading type ids two landing pads have in common. unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, const LandingPadInfo *R) { @@ -422,7 +194,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(I); if (!MO.isGlobal()) continue; - + const Function *F = dyn_cast<Function>(MO.getGlobal()); if (F == 0) continue; @@ -430,7 +202,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { // Be conservative. If we have more than one function operand for this // call, then we can't make the assumption that it's the callee and // not a parameter to the call. - // + // // FIXME: Determine if there's a way to say that `F' is the callee or // parameter. MarkedNoUnwind = false; @@ -497,8 +269,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // instruction between the previous try-range and this one may throw, // create a call-site entry with no landing pad for the region between the // try-ranges. - if (SawPotentiallyThrowing && - Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; @@ -520,8 +291,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, }; // Try to merge with the previous call-site. SJLJ doesn't do this - if (PreviousIsInvoke && - Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) { CallSiteEntry &Prev = CallSites.back(); if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { // Extend the range of the previous entry. @@ -531,7 +301,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, } // Otherwise, create a new call-site. - if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) + if (Asm->MAI->isExceptionHandlingDwarf()) CallSites.push_back(Site); else { // SjLj EH must maintain the call sites in the order assigned @@ -549,8 +319,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && - Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { CallSiteEntry Site = { LastLabel, 0, 0, 0 }; CallSites.push_back(Site); } @@ -620,7 +389,7 @@ void DwarfException::EmitExceptionTable() { // Call sites. bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; - + unsigned CallSiteTableLength; if (IsSJLJ) CallSiteTableLength = 0; @@ -628,7 +397,7 @@ void DwarfException::EmitExceptionTable() { unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 - CallSiteTableLength = + CallSiteTableLength = CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); } @@ -656,15 +425,15 @@ void DwarfException::EmitExceptionTable() { // mode, this reference will require a relocation by the dynamic linker. // // Because of this, we have a couple of options: - // + // // 1) If we are in -static mode, we can always use an absolute reference // from the LSDA, because the static linker will resolve it. - // + // // 2) Otherwise, if the LSDA section is writable, we can output the direct // reference to the typeinfo and allow the dynamic linker to relocate // it. Since it is in a writable section, the dynamic linker won't // have a problem. - // + // // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, // we need to use some form of indirection. For example, on Darwin, // we can output a statically-relocatable reference to a dyld stub. The @@ -682,11 +451,14 @@ void DwarfException::EmitExceptionTable() { } // Begin the exception table. - Asm->OutStreamer.SwitchSection(LSDASection); + // Sometimes we want not to emit the data into separate section (e.g. ARM + // EHABI). In this case LSDASection will be NULL. + if (LSDASection) + Asm->OutStreamer.SwitchSection(LSDASection); Asm->EmitAlignment(2); // Emit the LSDA. - MCSymbol *GCCETSym = + MCSymbol *GCCETSym = Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+ Twine(Asm->getFunctionNumber())); Asm->OutStreamer.EmitLabel(GCCETSym); @@ -764,7 +536,7 @@ void DwarfException::EmitExceptionTable() { } } else { // DWARF Exception handling - assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf); + assert(Asm->MAI->isExceptionHandlingDwarf()); // The call-site table is a list of all call sites that may throw an // exception (including C++ 'throw' statements) in the procedure @@ -793,23 +565,23 @@ void DwarfException::EmitExceptionTable() { for (SmallVectorImpl<CallSiteEntry>::const_iterator I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { const CallSiteEntry &S = *I; - + MCSymbol *EHFuncBeginSym = Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); - + MCSymbol *BeginLabel = S.BeginLabel; if (BeginLabel == 0) BeginLabel = EHFuncBeginSym; MCSymbol *EndLabel = S.EndLabel; if (EndLabel == 0) EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); - + // Offset of the call site relative to the previous call site, counted in // number of 16-byte bundles. The first call site is counted relative to // the start of the procedure fragment. Asm->OutStreamer.AddComment("Region start"); Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); - + Asm->OutStreamer.AddComment("Region length"); Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); @@ -834,7 +606,7 @@ void DwarfException::EmitExceptionTable() { Asm->OutStreamer.AddComment("-- Action Record Table --"); Asm->OutStreamer.AddBlankLine(); } - + for (SmallVectorImpl<ActionEntry>::const_iterator I = Actions.begin(), E = Actions.end(); I != E; ++I) { const ActionEntry &Action = *I; @@ -888,73 +660,17 @@ void DwarfException::EmitExceptionTable() { /// EndModule - Emit all exception information that should come after the /// content. void DwarfException::EndModule() { - if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf) - return; - - if (!shouldEmitMovesModule && !shouldEmitTableModule) - return; - - const std::vector<const Function*> &Personalities = MMI->getPersonalities(); - - for (unsigned I = 0, E = Personalities.size(); I < E; ++I) - EmitCIE(Personalities[I], I); - - for (std::vector<FunctionEHFrameInfo>::iterator - I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) - EmitFDE(*I); + assert(0 && "Should be implemented"); } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfException::BeginFunction(const MachineFunction *MF) { - shouldEmitTable = shouldEmitMoves = false; - - // If any landing pads survive, we need an EH table. - shouldEmitTable = !MMI->getLandingPads().empty(); - - // See if we need frame move info. - shouldEmitMoves = - !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; - - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); - - shouldEmitTableModule |= shouldEmitTable; - shouldEmitMovesModule |= shouldEmitMoves; + assert(0 && "Should be implemented"); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfException::EndFunction() { - if (!shouldEmitMoves && !shouldEmitTable) return; - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", - Asm->getFunctionNumber())); - - // Record if this personality index uses a landing pad. - bool HasLandingPad = !MMI->getLandingPads().empty(); - UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad; - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - - if (HasLandingPad) - EmitExceptionTable(); - - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - MCSymbol *FunctionEHSym = - Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh", - TLOF.isFunctionEHFrameSymbolPrivate()); - - // Save EH frame information - EHFrames. - push_back(FunctionEHFrameInfo(FunctionEHSym, - Asm->getFunctionNumber(), - MMI->getPersonalityIndex(), - Asm->MF->getFrameInfo()->adjustsStack(), - !MMI->getLandingPads().empty(), - MMI->getFrameMoves(), - Asm->MF->getFunction())); + assert(0 && "Should be implemented"); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index bc311e67054e..a172e53f8ac7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -35,60 +35,13 @@ class AsmPrinter; /// DwarfException - Emits Dwarf exception handling directives. /// class DwarfException { +protected: /// Asm - Target of Dwarf emission. AsmPrinter *Asm; /// MMI - Collected machine module information. MachineModuleInfo *MMI; - struct FunctionEHFrameInfo { - MCSymbol *FunctionEHSym; // L_foo.eh - unsigned Number; - unsigned PersonalityIndex; - bool adjustsStack; - bool hasLandingPads; - std::vector<MachineMove> Moves; - const Function *function; - - FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P, - bool hC, bool hL, - const std::vector<MachineMove> &M, - const Function *f): - FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P), - adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { } - }; - - std::vector<FunctionEHFrameInfo> EHFrames; - - /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index - /// uses an LSDA. If so, then we need to encode that information in the CIE's - /// augmentation. - DenseMap<unsigned, bool> UsesLSDA; - - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; - - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. - bool shouldEmitMoves; - - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; - - /// shouldEmitFrameModule - Per-module flag to indicate if frame moves - /// should be emitted. - bool shouldEmitMovesModule; - - /// EmitCIE - Emit a Common Information Entry (CIE). This holds information - /// that is shared among many Frame Description Entries. There is at least - /// one CIE in every non-empty .debug_frame section. - void EmitCIE(const Function *Personality, unsigned Index); - - /// EmitFDE - Emit the Frame Description Entry (FDE) for the function. - void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo); - /// EmitExceptionTable - Emit landing pads and actions. /// /// The general organization of the table is complex, but the basic concepts @@ -172,18 +125,116 @@ public: // Main entry points. // DwarfException(AsmPrinter *A); - ~DwarfException(); + virtual ~DwarfException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + +class DwarfCFIException : public DwarfException { + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfCFIException(AsmPrinter *A); + virtual ~DwarfCFIException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + +class DwarfTableException : public DwarfException { + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; + + /// shouldEmitMovesModule - Per-module flag to indicate if frame moves + /// should be emitted. + bool shouldEmitMovesModule; + + struct FunctionEHFrameInfo { + MCSymbol *FunctionEHSym; // L_foo.eh + unsigned Number; + unsigned PersonalityIndex; + bool adjustsStack; + bool hasLandingPads; + std::vector<MachineMove> Moves; + const Function *function; + + FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P, + bool hC, bool hL, + const std::vector<MachineMove> &M, + const Function *f): + FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P), + adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { } + }; + + std::vector<FunctionEHFrameInfo> EHFrames; + + /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index + /// uses an LSDA. If so, then we need to encode that information in the CIE's + /// augmentation. + DenseMap<unsigned, bool> UsesLSDA; + + /// EmitCIE - Emit a Common Information Entry (CIE). This holds information + /// that is shared among many Frame Description Entries. There is at least + /// one CIE in every non-empty .debug_frame section. + void EmitCIE(const Function *Personality, unsigned Index); + + /// EmitFDE - Emit the Frame Description Entry (FDE) for the function. + void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo); +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfTableException(AsmPrinter *A); + virtual ~DwarfTableException(); /// EndModule - Emit all exception information that should come after the /// content. - void EndModule(); + virtual void EndModule(); /// BeginFunction - Gather pre-function exception information. Assumes being /// emitted immediately after the function entry point. - void BeginFunction(const MachineFunction *MF); + virtual void BeginFunction(const MachineFunction *MF); /// EndFunction - Gather and emit post-function exception information. - void EndFunction(); + virtual void EndFunction(); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp new file mode 100644 index 000000000000..751901183cd0 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp @@ -0,0 +1,349 @@ +//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// The implementation emits all the necessary tables "by hands". +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +DwarfTableException::DwarfTableException(AsmPrinter *A) + : DwarfException(A), + shouldEmitTable(false), shouldEmitMoves(false), + shouldEmitTableModule(false), shouldEmitMovesModule(false) {} + +DwarfTableException::~DwarfTableException() {} + +/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that +/// is shared among many Frame Description Entries. There is at least one CIE +/// in every non-empty .debug_frame section. +void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) { + // Size and sign of stack growth. + int stackGrowth = Asm->getTargetData().getPointerSize(); + if (Asm->TM.getFrameLowering()->getStackGrowthDirection() == + TargetFrameLowering::StackGrowsDown) + stackGrowth *= -1; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + // Begin eh frame section. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + + MCSymbol *EHFrameSym; + if (TLOF.isFunctionEHFrameSymbolPrivate()) + EHFrameSym = Asm->GetTempSymbol("EH_frame", Index); + else + EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") + + Twine(Index)); + Asm->OutStreamer.EmitLabel(EHFrameSym); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index)); + + // Define base labels. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index)); + + // Define the eh frame length. + Asm->OutStreamer.AddComment("Length of Common Information Entry"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index), + Asm->GetTempSymbol("eh_frame_common_begin", Index), + 4); + + // EH frame header. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index)); + Asm->OutStreamer.AddComment("CIE Identifier Tag"); + Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + Asm->OutStreamer.AddComment("DW_CIE_VERSION"); + Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/); + + // The personality presence indicates that language specific information will + // show up in the eh frame. Find out how we are supposed to lower the + // personality function reference: + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + char Augmentation[6] = { 0 }; + unsigned AugmentationSize = 0; + char *APtr = Augmentation + 1; + + if (PersonalityFn) { + // There is a personality function. + *APtr++ = 'P'; + AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding); + } + + if (UsesLSDA[Index]) { + // An LSDA pointer is in the FDE augmentation. + *APtr++ = 'L'; + ++AugmentationSize; + } + + if (FDEEncoding != dwarf::DW_EH_PE_absptr) { + // A non-default pointer encoding for the FDE. + *APtr++ = 'R'; + ++AugmentationSize; + } + + if (APtr != Augmentation + 1) + Augmentation[0] = 'z'; + + Asm->OutStreamer.AddComment("CIE Augmentation"); + Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0); + + // Round out reader. + Asm->EmitULEB128(1, "CIE Code Alignment Factor"); + Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor"); + Asm->OutStreamer.AddComment("CIE Return Address Column"); + + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); + + if (Augmentation[0]) { + Asm->EmitULEB128(AugmentationSize, "Augmentation Size"); + + // If there is a personality, we need to indicate the function's location. + if (PersonalityFn) { + Asm->EmitEncodingByte(PerEncoding, "Personality"); + Asm->OutStreamer.AddComment("Personality"); + Asm->EmitReference(PersonalityFn, PerEncoding); + } + if (UsesLSDA[Index]) + Asm->EmitEncodingByte(LSDAEncoding, "LSDA"); + if (FDEEncoding != dwarf::DW_EH_PE_absptr) + Asm->EmitEncodingByte(FDEEncoding, "FDE"); + } + + // Indicate locations of general callee saved registers in frame. + std::vector<MachineMove> Moves; + TFI->getInitialFrameState(Moves); + Asm->EmitFrameMoves(Moves, 0, true); + + // On Darwin the linker honors the alignment of eh_frame, which means it must + // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get + // holes which confuse readers of eh_frame. + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index)); +} + +/// EmitFDE - Emit the Frame Description Entry (FDE) for the function. +void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { + assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && + "Should not emit 'available externally' functions at all"); + + const Function *TheFunc = EHFrameInfo.function; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(); + + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + + // Externally visible entry into the functions eh frame info. If the + // corresponding function is static, this should not be externally visible. + if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global); + + // If corresponding function is weak definition, this should be too. + if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + MCSA_WeakDefinition); + + // If corresponding function is hidden, this should be too. + if (TheFunc->hasHiddenVisibility()) + if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + HiddenAttr); + + // If there are no calls then you can't unwind. This may mean we can omit the + // EH Frame, but some environments do not handle weak absolute symbols. If + // UnwindTablesMandatory is set we cannot do this optimization; the unwind + // info is to be available for non-EH uses. + if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory && + (!TheFunc->isWeakForLinker() || + !Asm->MAI->getWeakDefDirective() || + TLOF.getSupportsWeakOmittedEHFrame())) { + Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym, + MCConstantExpr::Create(0, Asm->OutContext)); + // This name has no connection to the function, so it might get + // dead-stripped when the function is not, erroneously. Prohibit + // dead-stripping unconditionally. + if (Asm->MAI->hasNoDeadStrip()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + MCSA_NoDeadStrip); + } else { + Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym); + + // EH frame header. + Asm->OutStreamer.AddComment("Length of Frame Information Entry"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number), + Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin", + EHFrameInfo.Number)); + + Asm->OutStreamer.AddComment("FDE CIE offset"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), + Asm->GetTempSymbol("eh_frame_common", + EHFrameInfo.PersonalityIndex), 4); + + MCSymbol *EHFuncBeginSym = + Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number); + + Asm->OutStreamer.AddComment("FDE initial location"); + Asm->EmitReference(EHFuncBeginSym, FDEEncoding); + + Asm->OutStreamer.AddComment("FDE address range"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end", + EHFrameInfo.Number), + EHFuncBeginSym, + Asm->GetSizeOfEncodedValue(FDEEncoding)); + + // If there is a personality and landing pads then point to the language + // specific data area in the exception table. + if (MMI->getPersonalities()[0] != NULL) { + unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding); + + Asm->EmitULEB128(Size, "Augmentation size"); + Asm->OutStreamer.AddComment("Language Specific Data Area"); + if (EHFrameInfo.hasLandingPads) + Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number), + LSDAEncoding); + else + Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/); + + } else { + Asm->EmitULEB128(0, "Augmentation size"); + } + + // Indicate locations of function specific callee saved registers in frame. + Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true); + + // On Darwin the linker honors the alignment of eh_frame, which means it + // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you + // get holes which confuse readers of eh_frame. + Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end", + EHFrameInfo.Number)); + + // If the function is marked used, this table should be also. We cannot + // make the mark unconditional in this case, since retaining the table also + // retains the function in this case, and there is code around that depends + // on unused functions (calling undefined externals) being dead-stripped to + // link correctly. Yes, there really is. + if (MMI->isUsedFunction(EHFrameInfo.function)) + if (Asm->MAI->hasNoDeadStrip()) + Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym, + MCSA_NoDeadStrip); + } + Asm->OutStreamer.AddBlankLine(); +} + +/// EndModule - Emit all exception information that should come after the +/// content. +void DwarfTableException::EndModule() { + if (!Asm->MAI->isExceptionHandlingDwarf()) + return; + + if (!shouldEmitMovesModule && !shouldEmitTableModule) + return; + + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + + for (unsigned I = 0, E = Personalities.size(); I < E; ++I) + EmitCIE(Personalities[I], I); + + for (std::vector<FunctionEHFrameInfo>::iterator + I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) + EmitFDE(*I); +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void DwarfTableException::BeginFunction(const MachineFunction *MF) { + shouldEmitTable = shouldEmitMoves = false; + + // If any landing pads survive, we need an EH table. + shouldEmitTable = !MMI->getLandingPads().empty(); + + // See if we need frame move info. + shouldEmitMoves = + !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; + + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); + + shouldEmitTableModule |= shouldEmitTable; + shouldEmitMovesModule |= shouldEmitMoves; +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void DwarfTableException::EndFunction() { + if (!shouldEmitMoves && !shouldEmitTable) return; + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Record if this personality index uses a landing pad. + bool HasLandingPad = !MMI->getLandingPads().empty(); + UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + if (HasLandingPad) + EmitExceptionTable(); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + MCSymbol *FunctionEHSym = + Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh", + TLOF.isFunctionEHFrameSymbolPrivate()); + + // Save EH frame information + EHFrames. + push_back(FunctionEHFrameInfo(FunctionEHSym, + Asm->getFunctionNumber(), + MMI->getPersonalityIndex(), + Asm->MF->getFrameInfo()->adjustsStack(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + Asm->MF->getFunction())); +} diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index c8a63cf2393b..115381767751 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include <cctype> using namespace llvm; namespace { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 2ef115dbd205..d7d0e1b3812b 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,15 +1,19 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp + AllocationOrder.cpp Analysis.cpp BranchFolding.cpp CalcSpillWeights.cpp CallingConvLower.cpp + CodeGen.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp + EdgeBundles.cpp ELFCodeEmitter.cpp ELFWriter.cpp + ExpandISelPseudos.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp @@ -18,10 +22,13 @@ add_llvm_library(LLVMCodeGen IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp + LiveDebugVariables.cpp LiveInterval.cpp LiveIntervalAnalysis.cpp + LiveIntervalUnion.cpp LiveStackAnalysis.cpp LiveVariables.cpp + LiveRangeEdit.cpp LocalStackSlotAllocation.cpp LowerSubregs.cpp MachineBasicBlock.cpp @@ -34,6 +41,7 @@ add_llvm_library(LLVMCodeGen MachineInstr.cpp MachineLICM.cpp MachineLoopInfo.cpp + MachineLoopRanges.cpp MachineModuleInfo.cpp MachineModuleInfoImpls.cpp MachinePassRegistry.cpp @@ -45,15 +53,17 @@ add_llvm_library(LLVMCodeGen OcamlGC.cpp OptimizePHIs.cpp PHIElimination.cpp + PHIEliminationUtils.cpp Passes.cpp PeepholeOptimizer.cpp - PostRAHazardRecognizer.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + RegAllocBasic.cpp RegAllocFast.cpp + RegAllocGreedy.cpp RegAllocLinearScan.cpp RegAllocPBQP.cpp RegisterCoalescer.cpp @@ -63,12 +73,14 @@ add_llvm_library(LLVMCodeGen ScheduleDAGEmit.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp + ScoreboardHazardRecognizer.cpp ShadowStackGC.cpp ShrinkWrapping.cpp SimpleRegisterCoalescing.cpp SjLjEHPrepare.cpp SlotIndexes.cpp Spiller.cpp + SpillPlacement.cpp SplitKit.cpp Splitter.cpp StackProtector.cpp @@ -83,4 +95,5 @@ add_llvm_library(LLVMCodeGen VirtRegRewriter.cpp ) -target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts) +add_subdirectory(SelectionDAG) +add_subdirectory(AsmPrinter) diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 1b7e08a8b6bb..76bb3d148b0b 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -25,8 +25,12 @@ using namespace llvm; char CalculateSpillWeights::ID = 0; -INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false); +INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", + "Calculate spill weights", false, false) void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { au.addRequired<LiveIntervals>(); @@ -170,8 +174,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { totalWeight *= 0.5F; } - li.weight = totalWeight; - lis_.normalizeSpillWeight(li); + li.weight = normalizeSpillWeight(totalWeight, li.getSize()); } void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { @@ -218,7 +221,7 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { if (rc == orc) return; - DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to " - << rc->getName() <<".\n"); + DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg) + << " to " << rc->getName() <<".\n"); mri.setRegClass(reg, rc); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 62ad8171a9d4..2ad80b4d3a75 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -34,8 +34,8 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, // HandleByVal - Allocate a stack slot large enough to pass an argument by // value. The size and alignment information of the argument is encoded in its // parameter attribute. -void CCState::HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags) { unsigned Align = ArgFlags.getByValAlign(); @@ -51,11 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, EVT ValVT, /// MarkAllocated - Mark a register and all of its aliases as allocated. void CCState::MarkAllocated(unsigned Reg) { - UsedRegs[Reg/32] |= 1 << (Reg&31); - - if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) - for (; (Reg = *RegAliases); ++RegAliases) - UsedRegs[Reg/32] |= 1 << (Reg&31); + for (const unsigned *Alias = TRI.getOverlaps(Reg); + unsigned Reg = *Alias; ++Alias) + UsedRegs[Reg/32] |= 1 << (Reg&31); } /// AnalyzeFormalArguments - Analyze an array of argument values, @@ -66,12 +64,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, unsigned NumArgs = Ins.size(); for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Ins[i].VT; + MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Formal argument #" << i << " has unhandled type " - << ArgVT.getEVTString(); + << EVT(ArgVT).getEVTString(); #endif llvm_unreachable(0); } @@ -84,7 +82,7 @@ bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { // Determine which register each value should be copied into. for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].VT; + MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) return false; @@ -98,12 +96,12 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { // Determine which register each value should be copied into. for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].VT; + MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Return operand #" << i << " has unhandled type " - << VT.getEVTString(); + << EVT(VT).getEVTString(); #endif llvm_unreachable(0); } @@ -116,12 +114,12 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, CCAssignFn Fn) { unsigned NumOps = Outs.size(); for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = Outs[i].VT; + MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); + << EVT(ArgVT).getEVTString(); #endif llvm_unreachable(0); } @@ -130,17 +128,17 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, /// AnalyzeCallOperands - Same as above except it takes vectors of types /// and argument flags. -void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, +void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &Flags, CCAssignFn Fn) { unsigned NumOps = ArgVTs.size(); for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = ArgVTs[i]; + MVT ArgVT = ArgVTs[i]; ISD::ArgFlagsTy ArgFlags = Flags[i]; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); + << EVT(ArgVT).getEVTString(); #endif llvm_unreachable(0); } @@ -152,12 +150,12 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs, void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, CCAssignFn Fn) { for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT VT = Ins[i].VT; + MVT VT = Ins[i].VT; ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call result #" << i << " has unhandled type " - << VT.getEVTString(); + << EVT(VT).getEVTString(); #endif llvm_unreachable(0); } @@ -166,11 +164,11 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, /// AnalyzeCallResult - Same as above except it's specialized for calls which /// produce a single value. -void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { +void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { #ifndef NDEBUG dbgs() << "Call result has unhandled type " - << VT.getEVTString(); + << EVT(VT).getEVTString(); #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp new file mode 100644 index 000000000000..515e6f9fde87 --- /dev/null +++ b/lib/CodeGen/CodeGen.cpp @@ -0,0 +1,61 @@ +//===-- CodeGen.cpp -------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common initialization routines for the +// CodeGen library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" + +using namespace llvm; + +/// initializeCodeGen - Initialize all passes linked into the CodeGen library. +void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeCalculateSpillWeightsPass(Registry); + initializeDeadMachineInstructionElimPass(Registry); + initializeGCModuleInfoPass(Registry); + initializeIfConverterPass(Registry); + initializeLiveDebugVariablesPass(Registry); + initializeLiveIntervalsPass(Registry); + initializeLiveStacksPass(Registry); + initializeLiveVariablesPass(Registry); + initializeMachineCSEPass(Registry); + initializeMachineDominatorTreePass(Registry); + initializeMachineLICMPass(Registry); + initializeMachineLoopInfoPass(Registry); + initializeMachineModuleInfoPass(Registry); + initializeMachineSinkingPass(Registry); + initializeMachineVerifierPassPass(Registry); + initializeOptimizePHIsPass(Registry); + initializePHIEliminationPass(Registry); + initializePeepholeOptimizerPass(Registry); + initializePreAllocSplittingPass(Registry); + initializeProcessImplicitDefsPass(Registry); + initializePEIPass(Registry); + initializeRALinScanPass(Registry); + initializeRegisterCoalescerAnalysisGroup(Registry); + initializeRenderMachineFunctionPass(Registry); + initializeSimpleRegisterCoalescingPass(Registry); + initializeSlotIndexesPass(Registry); + initializeLoopSplitterPass(Registry); + initializeStackProtectorPass(Registry); + initializeStackSlotColoringPass(Registry); + initializeStrongPHIEliminationPass(Registry); + initializeTwoAddressInstructionPassPass(Registry); + initializeUnreachableBlockElimPass(Registry); + initializeUnreachableMachineBlockElimPass(Registry); + initializeVirtRegMapPass(Registry); + initializeLowerIntrinsicsPass(Registry); +} + +void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { + initializeCodeGen(*unwrap(R)); +} diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 335d2d8e9bac..f79598de1d9e 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -130,21 +130,25 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, return; assert(Count < InsertPosIndex && "Instruction index out of expected range!"); - // Any register which was defined within the previous scheduling region - // may have been rescheduled and its lifetime may overlap with registers - // in ways not reflected in our current liveness state. For each such - // register, adjust the liveness state to be conservatively correct. - for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) - if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { - assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); - - // Mark this register to be non-renamable. + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] != ~0u) { + // If Reg is currently live, then mark that it can't be renamed as + // we don't know the extent of its live-range anymore (now that it + // has been scheduled). + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = Count; + } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); // Move the def index to the end of the previous region, to reflect // that the def could theoretically have been scheduled at the end. DefIndices[Reg] = InsertPosIndex; } + } PrescanInstruction(MI); ScanInstruction(MI, Count); @@ -177,7 +181,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // that have special allocation requirements. Also assume all registers // used in a call must not be changed (ABI). // FIXME: The issue with predicated instruction is more complex. We are being - // conservatively here because the kill markers cannot be trusted after + // conservative here because the kill markers cannot be trusted after // if-conversion: // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] // ... @@ -321,8 +325,62 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, } } +// Check all machine operands that reference the antidependent register and must +// be replaced by NewReg. Return true if any of their parent instructions may +// clobber the new register. +// +// Note: AntiDepReg may be referenced by a two-address instruction such that +// it's use operand is tied to a def operand. We guard against the case in which +// the two-address instruction also defines NewReg, as may happen with +// pre/postincrement loads. In this case, both the use and def operands are in +// RegRefs because the def is inserted by PrescanInstruction and not erased +// during ScanInstruction. So checking for an instructions with definitions of +// both NewReg and AntiDepReg covers it. +bool +CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned NewReg) +{ + for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) { + MachineOperand *RefOper = I->second; + + // Don't allow the instruction defining AntiDepReg to earlyclobber its + // operands, in case they may be assigned to NewReg. In this case antidep + // breaking must fail, but it's too rare to bother optimizing. + if (RefOper->isDef() && RefOper->isEarlyClobber()) + return true; + + // Handle cases in which this instructions defines NewReg. + MachineInstr *MI = RefOper->getParent(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &CheckOper = MI->getOperand(i); + + if (!CheckOper.isReg() || !CheckOper.isDef() || + CheckOper.getReg() != NewReg) + continue; + + // Don't allow the instruction to define NewReg and AntiDepReg. + // When AntiDepReg is renamed it will be an illegal op. + if (RefOper->isDef()) + return true; + + // Don't allow an instruction using AntiDepReg to be earlyclobbered by + // NewReg + if (CheckOper.isEarlyClobber()) + return true; + + // Don't allow inline asm to define NewReg at all. Who know what it's + // doing with it. + if (MI->isInlineAsm()) + return true; + } + } + return false; +} + unsigned -CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, +CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC) @@ -338,10 +396,10 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI, // an anti-dependence with this AntiDepReg, because that would // re-introduce that anti-dependence. if (NewReg == LastNewReg) continue; - // If the instruction already has a def of the NewReg, it's not suitable. - // For example, Instruction with multiple definitions can result in this - // condition. - if (MI->modifiesRegister(NewReg, TRI)) continue; + // If any instructions that define AntiDepReg also define the NewReg, it's + // not suitable. For example, Instruction with multiple definitions can + // result in this condition. + if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue; // If NewReg is dead and NewReg's most recent def is not before // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) @@ -548,7 +606,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { - if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg, + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, + AntiDepReg, LastNewReg[AntiDepReg], RC)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " @@ -558,9 +620,6 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // Update the references to the old register to refer to the new // register. - std::pair<std::multimap<unsigned, MachineOperand *>::iterator, - std::multimap<unsigned, MachineOperand *>::iterator> - Range = RegRefs.equal_range(AntiDepReg); for (std::multimap<unsigned, MachineOperand *>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second->setReg(NewReg); @@ -580,7 +639,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, } // We just went back in time and modified history; the - // liveness information for the anti-depenence reg is now + // liveness information for the anti-dependence reg is now // inconsistent. Set the state as if it were dead. Classes[NewReg] = Classes[AntiDepReg]; DefIndices[NewReg] = DefIndices[AntiDepReg]; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 0ed7c35b0f0c..0daaef273448 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -48,8 +48,10 @@ class TargetRegisterInfo; /// pointer. std::vector<const TargetRegisterClass*> Classes; - /// RegRegs - Map registers to all their references within a live range. + /// RegRefs - Map registers to all their references within a live range. std::multimap<unsigned, MachineOperand *> RegRefs; + typedef std::multimap<unsigned, MachineOperand *>::const_iterator + RegRefIter; /// KillIndices - The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. @@ -90,10 +92,14 @@ class TargetRegisterInfo; private: void PrescanInstruction(MachineInstr *MI); void ScanInstruction(MachineInstr *MI, unsigned Count); - unsigned findSuitableFreeRegister(MachineInstr *MI, + bool isNewRegClobberedByRefs(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned NewReg); + unsigned findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, unsigned AntiDepReg, unsigned LastNewReg, - const TargetRegisterClass *); + const TargetRegisterClass *RC); }; } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 318d922adebf..fdc1d9142140 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -36,7 +36,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - DeadMachineInstructionElim() : MachineFunctionPass(ID) {} + DeadMachineInstructionElim() : MachineFunctionPass(ID) { + initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry()); + } private: bool isDead(const MachineInstr *MI) const; @@ -45,13 +47,19 @@ namespace { char DeadMachineInstructionElim::ID = 0; INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", - "Remove dead machine instructions", false, false); + "Remove dead machine instructions", false, false) FunctionPass *llvm::createDeadMachineInstructionElimPass() { return new DeadMachineInstructionElim(); } bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { + // Technically speaking inline asm without side effects and no defs can still + // be deleted. But there is so much bad inline asm code out there, we should + // let them be. + if (MI->isInlineAsm()) + return false; + // Don't delete instructions with side effects. bool SawStore = false; if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI()) @@ -151,7 +159,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); - if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after @@ -168,7 +176,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isUse()) { unsigned Reg = MO.getReg(); - if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { LivePhysRegs.set(Reg); for (const unsigned *AliasSet = TRI->getAliasSet(Reg); *AliasSet; ++AliasSet) diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 550fd3e25fb7..0ebb5b0db70e 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -43,7 +43,7 @@ namespace { // The eh.selector intrinsic. Function *SelectorIntrinsic; - // _Unwind_Resume_or_Rethrow call. + // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call. Constant *URoR; // The EH language-specific catch-all type. @@ -82,11 +82,11 @@ namespace { /// FindAllURoRInvokes - Find all URoR invokes in the function. void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes); - /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" - /// calls. The "unwind" part of these invokes jump to a landing pad within - /// the current function. This is a candidate to merge the selector - /// associated with the URoR invoke with the one from the URoR's landing - /// pad. + /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or + /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to + /// a landing pad within the current function. This is a candidate to merge + /// the selector associated with the URoR invoke with the one from the + /// URoR's landing pad. bool HandleURoRInvokes(); /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated @@ -100,7 +100,9 @@ namespace { DwarfEHPrepare(const TargetMachine *tm) : FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), ExceptionValueIntrinsic(0), SelectorIntrinsic(0), - URoR(0), EHCatchAllValue(0), RewindFunction(0) {} + URoR(0), EHCatchAllValue(0), RewindFunction(0) { + initializeDominatorTreePass(*PassRegistry::getPassRegistry()); + } virtual bool runOnFunction(Function &Fn); @@ -224,10 +226,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, return Changed; } -/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The -/// "unwind" part of these invokes jump to a landing pad within the current -/// function. This is a candidate to merge the selector associated with the URoR -/// invoke with the one from the URoR's landing pad. +/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or +/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a +/// landing pad within the current function. This is a candidate to merge the +/// selector associated with the URoR invoke with the one from the URoR's +/// landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { if (!EHCatchAllValue) { EHCatchAllValue = @@ -247,7 +250,10 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(CatchAllSels); + if (!URoR) { + URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume"); + if (!URoR) return CleanupSelectors(CatchAllSels); + } } SmallPtrSet<InvokeInst*, 32> URoRInvokes; diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index fb884c9e8b71..e08feeb27539 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -23,7 +23,7 @@ #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineRelocation.h" #include "llvm/Support/ELF.h" -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class GlobalValue; diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index d14728d8a36c..0fd1e8e83bd7 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -45,6 +45,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetELFWriterInfo.h" #include "llvm/Target/TargetLowering.h" @@ -64,7 +65,7 @@ char ELFWriter::ID = 0; ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(ID), O(o), TM(tm), - OutContext(*new MCContext(*TM.getMCAsmInfo())), + OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))), TLOF(TM.getTargetLowering()->getObjFileLowering()), is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), isLittleEndian(TM.getTargetData()->isLittleEndian()), @@ -327,6 +328,18 @@ void ELFWriter::AddToSymbolList(ELFSym *GblSym) { } } +/// HasCommonSymbols - True if this section holds common symbols, this is +/// indicated on the ELF object file by a symbol with SHN_COMMON section +/// header index. +static bool HasCommonSymbols(const MCSectionELF &S) { + // FIXME: this is wrong, a common symbol can be in .data for example. + if (StringRef(S.getSectionName()).startswith(".gnu.linkonce.")) + return true; + + return false; +} + + // EmitGlobal - Choose the right section for global and emit it void ELFWriter::EmitGlobal(const GlobalValue *GV) { @@ -363,7 +376,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) { unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType()); GblSym->Size = Size; - if (S->HasCommonSymbols()) { // Symbol must go to a common section + if (HasCommonSymbols(*S)) { // Symbol must go to a common section GblSym->SectionIdx = ELF::SHN_COMMON; // A new linkonce section is created for each global in the diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp new file mode 100644 index 000000000000..aed8bc947991 --- /dev/null +++ b/lib/CodeGen/EdgeBundles.cpp @@ -0,0 +1,86 @@ +//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the EdgeBundles analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +static cl::opt<bool> +ViewEdgeBundles("view-edge-bundles", cl::Hidden, + cl::desc("Pop up a window to show edge bundle graphs")); + +char EdgeBundles::ID = 0; + +INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges", + /* cfg = */true, /* analysis = */ true) + +char &llvm::EdgeBundlesID = EdgeBundles::ID; + +void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + EC.clear(); + EC.grow(2 * MF->size()); + + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + const MachineBasicBlock &MBB = *I; + unsigned OutE = 2 * MBB.getNumber() + 1; + // Join the outgoing bundle with the ingoing bundles of all successors. + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + EC.join(OutE, 2 * (*SI)->getNumber()); + } + EC.compress(); + if (ViewEdgeBundles) + view(); + return false; +} + +/// view - Visualize the annotated bipartite CFG with Graphviz. +void EdgeBundles::view() const { + ViewGraph(*this, "EdgeBundles"); +} + +/// Specialize WriteGraph, the standard implementation won't work. +raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, + const std::string &Title) { + const MachineFunction *MF = G.getMachineFunction(); + + O << "digraph {\n"; + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + unsigned BB = I->getNumber(); + O << "\t\"BB#" << BB << "\" [ shape=box ]\n" + << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" + << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; + for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(), + SE = I->succ_end(); SI != SE; ++SI) + O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() + << "\" [ color=lightgray ]\n"; + } + O << "}\n"; + return O; +} + + diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp new file mode 100644 index 000000000000..b5ec303f5d93 --- /dev/null +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -0,0 +1,82 @@ +//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Expand Psuedo-instructions produced by ISel. These are usually to allow +// the expansion to contain control flow, such as a conditional move +// implemented with a conditional branch and a phi, or an atomic operation +// implemented with a loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "expand-isel-pseudos" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +namespace { + class ExpandISelPseudos : public MachineFunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandISelPseudos() : MachineFunctionPass(ID) {} + + private: + virtual bool runOnMachineFunction(MachineFunction &MF); + + const char *getPassName() const { + return "Expand ISel Pseudo-instructions"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} // end anonymous namespace + +char ExpandISelPseudos::ID = 0; +INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", + "Expand CodeGen Pseudo-instructions", false, false) + +FunctionPass *llvm::createExpandISelPseudosPass() { + return new ExpandISelPseudos(); +} + +bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const TargetLowering *TLI = MF.getTarget().getTargetLowering(); + + // Iterate through each instruction in the function, looking for pseudos. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI != MBBE; ) { + MachineInstr *MI = MBBI++; + + // If MI is a pseudo, expand it. + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.usesCustomInsertionHook()) { + Changed = true; + MachineBasicBlock *NewMBB = + TLI->EmitInstrWithCustomInserter(MI, MBB); + // The expansion may involve new basic blocks. + if (NewMBB != MBB) { + MBB = NewMBB; + I = NewMBB; + MBBI = NewMBB->begin(); + MBBE = NewMBB->end(); + } + } + } + } + + return Changed; +} diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 0f6e882a7be4..d757cf409d50 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -30,7 +30,6 @@ namespace { raw_ostream &OS; public: - Printer() : FunctionPass(ID), OS(errs()) {} explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} @@ -56,7 +55,7 @@ namespace { } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", - "Create Garbage Collector Module Metadata", false, false); + "Create Garbage Collector Module Metadata", false, false) // ----------------------------------------------------------------------------- @@ -70,7 +69,9 @@ GCFunctionInfo::~GCFunctionInfo() {} char GCModuleInfo::ID = 0; GCModuleInfo::GCModuleInfo() - : ImmutablePass(ID) {} + : ImmutablePass(ID) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); +} GCModuleInfo::~GCModuleInfo() { clear(); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 719fa194d8da..766c6ee542a9 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -19,11 +19,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -123,6 +124,11 @@ GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { // ----------------------------------------------------------------------------- +INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", + false, false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) + FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } @@ -130,7 +136,9 @@ FunctionPass *llvm::createGCLoweringPass() { char LowerIntrinsics::ID = 0; LowerIntrinsics::LowerIntrinsics() - : FunctionPass(ID) {} + : FunctionPass(ID) { + initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); + } const char *LowerIntrinsics::getPassName() const { return "Lower Garbage Collection Instructions"; @@ -139,6 +147,7 @@ const char *LowerIntrinsics::getPassName() const { void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { FunctionPass::getAnalysisUsage(AU); AU.addRequired<GCModuleInfo>(); + AU.addPreserved<DominatorTree>(); } /// doInitialization - If this module uses the GC intrinsics, find them now. @@ -249,9 +258,16 @@ bool LowerIntrinsics::runOnFunction(Function &F) { if (NeedsDefaultLoweringPass(S)) MadeChange |= PerformDefaultLowering(F, S); - if (NeedsCustomLoweringPass(S)) + bool UseCustomLoweringPass = NeedsCustomLoweringPass(S); + if (UseCustomLoweringPass) MadeChange |= S.performCustomLowering(F); - + + // Custom lowering may modify the CFG, so dominators must be recomputed. + if (UseCustomLoweringPass) { + if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) + DT->DT->recalculate(F); + } + return MadeChange; } @@ -345,13 +361,15 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { MachineBasicBlock::iterator RAI = CI; ++RAI; - if (FI->getStrategy().needsSafePoint(GC::PreCall)) - FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI, - CI->getDebugLoc())); + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { + MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); + FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); + } - if (FI->getStrategy().needsSafePoint(GC::PostCall)) - FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI, - CI->getDebugLoc())); + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { + MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); + FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); + } } void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { @@ -364,12 +382,12 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { } void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { - const TargetRegisterInfo *TRI = TM->getRegisterInfo(); - assert(TRI && "TargetRegisterInfo not available!"); + const TargetFrameLowering *TFI = TM->getFrameLowering(); + assert(TFI && "TargetRegisterInfo not available!"); for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(), RE = FI->roots_end(); RI != RE; ++RI) - RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num); + RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num); } bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 0ea30d7a7929..db53b0473a9a 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -17,7 +17,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -26,6 +28,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -91,6 +94,8 @@ namespace { /// ClobbersPred - True if BB could modify predicates (e.g. has /// cmp, call, etc.) /// NonPredSize - Number of non-predicated instructions. + /// ExtraCost - Extra cost for multi-cycle instructions. + /// ExtraCost2 - Some instructions are slower when predicated /// BB - Corresponding MachineBasicBlock. /// TrueBB / FalseBB- See AnalyzeBranch(). /// BrCond - Conditions for end of block conditional branches. @@ -106,6 +111,8 @@ namespace { bool CannotBeCopied : 1; bool ClobbersPred : 1; unsigned NonPredSize; + unsigned ExtraCost; + unsigned ExtraCost2; MachineBasicBlock *BB; MachineBasicBlock *TrueBB; MachineBasicBlock *FalseBB; @@ -115,7 +122,7 @@ namespace { IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - BB(0), TrueBB(0), FalseBB(0) {} + ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} }; /// IfcvtToken - Record information about pending if-conversions to attempt: @@ -150,20 +157,31 @@ namespace { const TargetLowering *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + const MachineLoopInfo *MLI; bool MadeChange; int FnNum; public: static char ID; - IfConverter() : MachineFunctionPass(ID), FnNum(-1) {} + IfConverter() : MachineFunctionPass(ID), FnNum(-1) { + initializeIfConverterPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "If Converter"; } private: bool ReverseBranchCondition(BBInfo &BBI); - bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const; + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + float Prediction, float Confidence) const; bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, - bool FalseBranch, unsigned &Dups) const; + bool FalseBranch, unsigned &Dups, + float Prediction, float Confidence) const; bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); @@ -188,14 +206,21 @@ namespace { bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const { - return Size > 0 && TII->isProfitableToIfCvt(BB, Size); + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, + unsigned Cycle, unsigned Extra, + float Prediction, float Confidence) const { + return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, + Prediction, Confidence); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, - MachineBasicBlock &FBB, unsigned FSize) const { - return TSize > 0 && FSize > 0 && - TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize); + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, + unsigned TCycle, unsigned TExtra, + MachineBasicBlock &FBB, + unsigned FCycle, unsigned FExtra, + float Prediction, float Confidence) const { + return TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, + Prediction, Confidence); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -230,7 +255,9 @@ namespace { char IfConverter::ID = 0; } -INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false); +INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } @@ -238,6 +265,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + MLI = &getAnalysis<MachineLoopInfo>(); + InstrItins = MF.getTarget().getInstrItineraryData(); if (!TII) return false; // Tail merge tend to expose more if-conversion opportunities. @@ -431,7 +460,8 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { /// predecessor) forms a valid simple shape for ifcvt. It also returns the /// number of instructions that the ifcvt would need to duplicate if performed /// in Dups. -bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + float Prediction, float Confidence) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -441,7 +471,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || - !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize)) + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, + Prediction, Confidence)) return false; Dups = TrueBBI.NonPredSize; } @@ -456,7 +487,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { /// returns the number of instructions that the ifcvt would need to duplicate /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, - bool FalseBranch, unsigned &Dups) const { + bool FalseBranch, unsigned &Dups, + float Prediction, float Confidence) const { Dups = 0; if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) return false; @@ -478,7 +510,8 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size)) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, + Prediction, Confidence)) return false; Dups = Size; } @@ -493,18 +526,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, return TExit && TExit == FalseBBI.BB; } -static -MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB, - const TargetInstrInfo *TII) { - MachineBasicBlock::iterator I = BB->end(); - while (I != BB->begin()) { - --I; - if (!I->getDesc().isBranch()) - break; - } - return I; -} - /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid diamond shape for ifcvt. bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -533,64 +554,70 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) return false; - MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); - MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); - // Skip dbg_value instructions - while (TI != TIE && TI->isDebugValue()) - ++TI; - while (FI != FIE && FI->isDebugValue()) - ++FI; - while (TI != TIE && FI != FIE) { + while (TIB != TIE && FIB != FIE) { // Skip dbg_value instructions. These do not count. - if (TI->isDebugValue()) { - while (TI != TIE && TI->isDebugValue()) - ++TI; - if (TI == TIE) + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) break; } - if (FI->isDebugValue()) { - while (FI != FIE && FI->isDebugValue()) - ++FI; - if (FI == FIE) + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) break; } - if (!TI->isIdenticalTo(FI)) + if (!TIB->isIdenticalTo(FIB)) break; ++Dups1; - ++TI; - ++FI; + ++TIB; + ++FIB; } - TI = firstNonBranchInst(TrueBBI.BB, TII); - FI = firstNonBranchInst(FalseBBI.BB, TII); - MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); - MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); - // Skip dbg_value instructions at end of the bb's. - while (TI != TIB && TI->isDebugValue()) - --TI; - while (FI != FIB && FI->isDebugValue()) - --FI; - while (TI != TIB && FI != FIB) { + // Now, in preparation for counting duplicate instructions at the ends of the + // blocks, move the end iterators up past any branch instructions. + while (TIE != TIB) { + --TIE; + if (!TIE->getDesc().isBranch()) + break; + } + while (FIE != FIB) { + --FIE; + if (!FIE->getDesc().isBranch()) + break; + } + + // If Dups1 includes all of a block, then don't count duplicate + // instructions at the end of the blocks. + if (TIB == TIE || FIB == FIE) + return true; + + // Count duplicate instructions at the ends of the blocks. + while (TIE != TIB && FIE != FIB) { // Skip dbg_value instructions. These do not count. - if (TI->isDebugValue()) { - while (TI != TIB && TI->isDebugValue()) - --TI; - if (TI == TIB) + if (TIE->isDebugValue()) { + while (TIE != TIB && TIE->isDebugValue()) + --TIE; + if (TIE == TIB) break; } - if (FI->isDebugValue()) { - while (FI != FIB && FI->isDebugValue()) - --FI; - if (FI == FIB) + if (FIE->isDebugValue()) { + while (FIE != FIB && FIE->isDebugValue()) + --FIE; + if (FIE == FIB) break; } - if (!TI->isIdenticalTo(FI)) + if (!TIE->isIdenticalTo(FIE)) break; ++Dups2; - --TI; - --FI; + --TIE; + --FIE; } return true; @@ -627,6 +654,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // Then scan all the instructions. BBI.NonPredSize = 0; + BBI.ExtraCost = 0; + BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { @@ -641,9 +670,15 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); if (!isCondBr) { - if (!isPredicated) + if (!isPredicated) { BBI.NonPredSize++; - else if (!AlreadyPredicated) { + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, + &ExtraPredCost); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { // FIXME: This instruction is already predicated before the // if-conversion pass. It's probably something like a conditional move. // Mark this block unpredicable for now. @@ -765,9 +800,35 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, bool TNeedSub = TrueBBI.Predicate.size() > 0; bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; + + // Try to predict the branch, using loop info to guide us. + // General heuristics are: + // - backedge -> 90% taken + // - early exit -> 20% taken + // - branch predictor confidence -> 90% + float Prediction = 0.5f; + float Confidence = 0.9f; + MachineLoop *Loop = MLI->getLoopFor(BB); + if (Loop) { + if (TrueBBI.BB == Loop->getHeader()) + Prediction = 0.9f; + else if (FalseBBI.BB == Loop->getHeader()) + Prediction = 0.1f; + + MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB); + MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB); + if (!TrueLoop || TrueLoop->getParentLoop() == Loop) + Prediction = 0.2f; + else if (!FalseLoop || FalseLoop->getParentLoop() == Loop) + Prediction = 0.8f; + } + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2), - *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, + *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, + Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -783,8 +844,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -797,15 +859,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Enqueued = true; } - if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(TrueBBI, Dups) && - MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && + if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -820,22 +884,30 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... - if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && - MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, + 1.0-Prediction, Confidence) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } - if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && - MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, + 1.0-Prediction, Confidence) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } - if (ValidSimple(FalseBBI, Dups) && - MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && + if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -1365,6 +1437,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + if (NumCycles > 1) + ToBBI.ExtraCost += NumCycles-1; + ToBBI.ExtraCost2 += ExtraPredCost; if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { @@ -1438,7 +1515,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { FromBBI.Predicate.clear(); ToBBI.NonPredSize += FromBBI.NonPredSize; + ToBBI.ExtraCost += FromBBI.ExtraCost; + ToBBI.ExtraCost2 += FromBBI.ExtraCost2; FromBBI.NonPredSize = 0; + FromBBI.ExtraCost = 0; + FromBBI.ExtraCost2 = 0; ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.HasFallThrough = FromBBI.HasFallThrough; diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index b965bfdcf3b8..a1bd972d38e2 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -12,28 +12,34 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "spiller" +#define DEBUG_TYPE "regalloc" #include "Spiller.h" -#include "SplitKit.h" +#include "LiveRangeEdit.h" #include "VirtRegMap.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<bool> +VerifySpills("verify-spills", cl::desc("Verify after each spill/split")); + namespace { class InlineSpiller : public Spiller { MachineFunctionPass &pass_; MachineFunction &mf_; LiveIntervals &lis_; - MachineLoopInfo &loops_; + LiveStacks &lss_; + AliasAnalysis *aa_; VirtRegMap &vrm_; MachineFrameInfo &mfi_; MachineRegisterInfo &mri_; @@ -41,19 +47,12 @@ class InlineSpiller : public Spiller { const TargetRegisterInfo &tri_; const BitVector reserved_; - SplitAnalysis splitAnalysis_; - // Variables that are valid during spill(), but used by multiple methods. - LiveInterval *li_; - SmallVectorImpl<LiveInterval*> *newIntervals_; + LiveRangeEdit *edit_; const TargetRegisterClass *rc_; int stackSlot_; - const SmallVectorImpl<LiveInterval*> *spillIs_; - // Values of the current interval that can potentially remat. - SmallPtrSet<VNInfo*, 8> reMattable_; - - // Values in reMattable_ that failed to remat at some point. + // Values that failed to remat at some point. SmallPtrSet<VNInfo*, 8> usedValues_; ~InlineSpiller() {} @@ -65,30 +64,29 @@ public: : pass_(pass), mf_(mf), lis_(pass.getAnalysis<LiveIntervals>()), - loops_(pass.getAnalysis<MachineLoopInfo>()), + lss_(pass.getAnalysis<LiveStacks>()), + aa_(&pass.getAnalysis<AliasAnalysis>()), vrm_(vrm), mfi_(*mf.getFrameInfo()), mri_(mf.getRegInfo()), tii_(*mf.getTarget().getInstrInfo()), tri_(*mf.getTarget().getRegisterInfo()), - reserved_(tri_.getReservedRegs(mf_)), - splitAnalysis_(mf, lis_, loops_) {} + reserved_(tri_.getReservedRegs(mf_)) {} void spill(LiveInterval *li, SmallVectorImpl<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs); + const SmallVectorImpl<LiveInterval*> &spillIs); -private: - bool split(); + void spill(LiveRangeEdit &); - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx); +private: bool reMaterializeFor(MachineBasicBlock::iterator MI); void reMaterializeAll(); bool coalesceStackAccess(MachineInstr *MI); bool foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops); + const SmallVectorImpl<unsigned> &Ops, + MachineInstr *LoadMI = 0); void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); }; @@ -98,106 +96,41 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { + if (VerifySpills) + mf.verify(&pass, "When creating inline spiller"); return new InlineSpiller(pass, mf, vrm); } } -/// split - try splitting the current interval into pieces that may allocate -/// separately. Return true if successful. -bool InlineSpiller::split() { - splitAnalysis_.analyze(li_); - - if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) { - // We can split, but li_ may be left intact with fewer uses. - if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) - .splitAroundLoop(loop)) - return true; - } - - // Try splitting into single block intervals. - SplitAnalysis::BlockPtrSet blocks; - if (splitAnalysis_.getMultiUseBlocks(blocks)) { - if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) - .splitSingleBlocks(blocks)) - return true; - } - - // Try splitting inside a basic block. - if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) { - if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_) - .splitInsideBlock(MBB)) - return true; - } - - // We may have been able to split out some uses, but the original interval is - // intact, and it should still be spilled. - return false; -} - -/// allUsesAvailableAt - Return true if all registers used by OrigMI at -/// OrigIdx are also available with the same value at UseIdx. -bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI, - SlotIndex OrigIdx, - SlotIndex UseIdx) { - OrigIdx = OrigIdx.getUseIndex(); - UseIdx = UseIdx.getUseIndex(); - for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg) - continue; - // Reserved registers are OK. - if (MO.isUndef() || !lis_.hasInterval(MO.getReg())) - continue; - // We don't want to move any defs. - if (MO.isDef()) - return false; - // We cannot depend on virtual registers in spillIs_. They will be spilled. - for (unsigned si = 0, se = spillIs_->size(); si != se; ++si) - if ((*spillIs_)[si]->reg == MO.getReg()) - return false; - - LiveInterval &LI = lis_.getInterval(MO.getReg()); - const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx); - if (!OVNI) - continue; - if (OVNI != LI.getVNInfoAt(UseIdx)) - return false; - } - return true; -} - -/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of +/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of /// reloading it. bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); - VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx); + VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx); + if (!OrigVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) + if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) MO.setIsUndef(); } DEBUG(dbgs() << UseIdx << '\t' << *MI); return true; } - if (!reMattable_.count(OrigVNI)) { - DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": " - << UseIdx << '\t' << *MI); - return false; - } - MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def); - if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) { + + LiveRangeEdit::Remat RM(OrigVNI); + if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) { usedValues_.insert(OrigVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; } - // If the instruction also writes li_->reg, it had better not require the same - // register for uses and defs. + // If the instruction also writes edit_->getReg(), it had better not require + // the same register for uses and defs. bool Reads, Writes; SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops); if (Writes) { for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); @@ -209,62 +142,57 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { } } + // Before rematerializing into a register for a single instruction, try to + // fold a load into the instruction. That avoids allocating a new register. + if (RM.OrigMI->getDesc().canFoldAsLoad() && + foldMemoryOperand(MI, Ops, RM.OrigMI)) { + edit_->markRematerialized(RM.ParentVNI); + return true; + } + // Alocate a new register for the remat. - unsigned NewVReg = mri_.createVirtualRegister(rc_); - vrm_.grow(); - LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_); NewLI.markNotSpillable(); - newIntervals_->push_back(&NewLI); + + // Rematting for a copy: Set allocation hint to be the destination register. + if (MI->isCopy()) + mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg()); // Finally we can rematerialize OrigMI before MI. - MachineBasicBlock &MBB = *MI->getParent(); - tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_); - MachineBasicBlock::iterator RematMI = MI; - SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex(); - DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI); + SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + lis_, tii_, tri_); + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' + << *lis_.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); - if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) { - MO.setReg(NewVReg); + if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) { + MO.setReg(NewLI.reg); MO.setIsKill(); } } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true, - lis_.getVNInfoAllocator()); + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator()); NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); return true; } -/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, +/// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // Do a quick scan of the interval values to find if any are remattable. - reMattable_.clear(); - usedValues_.clear(); - for (LiveInterval::const_vni_iterator I = li_->vni_begin(), - E = li_->vni_end(); I != E; ++I) { - VNInfo *VNI = *I; - if (VNI->isUnused() || !VNI->isDefAccurate()) - continue; - MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); - if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) - continue; - reMattable_.insert(VNI); - } - - // Often, no defs are remattable. - if (reMattable_.empty()) + if (!edit_->anyRematerializable(lis_, tii_, aa_)) return; - // Try to remat before all uses of li_->reg. + usedValues_.clear(); + + // Try to remat before all uses of edit_->getReg(). bool anyRemat = false; for (MachineRegisterInfo::use_nodbg_iterator - RI = mri_.use_nodbg_begin(li_->reg); + RI = mri_.use_nodbg_begin(edit_->getReg()); MachineInstr *MI = RI.skipInstruction();) anyRemat |= reMaterializeFor(MI); @@ -273,33 +201,35 @@ void InlineSpiller::reMaterializeAll() { // Remove any values that were completely rematted. bool anyRemoved = false; - for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(), - E = reMattable_.end(); I != E; ++I) { + for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(), + E = edit_->getParent().vni_end(); I != E; ++I) { VNInfo *VNI = *I; - if (VNI->hasPHIKill() || usedValues_.count(VNI)) + if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) || + usedValues_.count(VNI)) continue; MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); lis_.RemoveMachineInstrFromMaps(DefMI); vrm_.RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); - VNI->setIsDefAccurate(false); + VNI->def = SlotIndex(); anyRemoved = true; } if (!anyRemoved) return; - // Removing values may cause debug uses where li_ is not live. - for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); + // Removing values may cause debug uses where parent is not live. + for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg()); MachineInstr *MI = RI.skipInstruction();) { if (!MI->isDebugValue()) continue; - // Try to preserve the debug value if li_ is live immediately after it. + // Try to preserve the debug value if parent is live immediately after it. MachineBasicBlock::iterator NextMI = MI; ++NextMI; if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { - VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI)); + SlotIndex Idx = lis_.getInstructionIndex(NextMI); + VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) continue; } @@ -317,7 +247,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { return false; // We have a stack access. Is it the right register and slot? - if (reg != li_->reg || FI != stackSlot_) + if (reg != edit_->getReg() || FI != stackSlot_) return false; DEBUG(dbgs() << "Coalescing stack access: " << *MI); @@ -327,9 +257,13 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { } /// foldMemoryOperand - Try folding stack slot references in Ops into MI. -/// Return true on success, and MI will be erased. +/// @param MI Instruction using or defining the current register. +/// @param Ops Operand indices from readsWritesVirtualRegister(). +/// @param LoadMI Load instruction to use instead of stack slot when non-null. +/// @return True on success, and MI will be erased. bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl<unsigned> &Ops) { + const SmallVectorImpl<unsigned> &Ops, + MachineInstr *LoadMI) { // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector<unsigned, 8> FoldOps; @@ -341,16 +275,22 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, // FIXME: Teach targets to deal with subregs. if (MO.getSubReg()) return false; + // We cannot fold a load instruction into a def. + if (LoadMI && MO.isDef()) + return false; // Tied use operands should not be passed to foldMemoryOperand. if (!MI->isRegTiedToDefOperand(Idx)) FoldOps.push_back(Idx); } - MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + MachineInstr *FoldMI = + LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI) + : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); if (!FoldMI) return false; lis_.ReplaceMachineInstrInMaps(MI, FoldMI); - vrm_.addSpillSlotUse(stackSlot_, FoldMI); + if (!LoadMI) + vrm_.addSpillSlotUse(stackSlot_, FoldMI); MI->eraseFromParent(); DEBUG(dbgs() << "\tfolded: " << *FoldMI); return true; @@ -366,7 +306,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); vrm_.addSpillSlotUse(stackSlot_, MI); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true, + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, lis_.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); } @@ -375,44 +315,58 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, void InlineSpiller::insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); + + // Get the defined value. It could be an early clobber so keep the def index. SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); + assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo"); + Idx = VNI->def; + tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); --MI; // Point to store instruction. SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); vrm_.addSpillSlotUse(stackSlot_, MI); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true, - lis_.getVNInfoAllocator()); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); } void InlineSpiller::spill(LiveInterval *li, SmallVectorImpl<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs) { - DEBUG(dbgs() << "Inline spilling " << *li << "\n"); - assert(li->isSpillable() && "Attempting to spill already spilled value."); - assert(!li->isStackSlot() && "Trying to spill a stack slot."); - - li_ = li; - newIntervals_ = &newIntervals; - rc_ = mri_.getRegClass(li->reg); - spillIs_ = &spillIs; + const SmallVectorImpl<LiveInterval*> &spillIs) { + LiveRangeEdit edit(*li, newIntervals, spillIs); + spill(edit); + if (VerifySpills) + mf_.verify(&pass_, "After inline spill"); +} - if (split()) - return; +void InlineSpiller::spill(LiveRangeEdit &edit) { + edit_ = &edit; + assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) + && "Trying to spill a stack slot."); + DEBUG(dbgs() << "Inline spilling " + << mri_.getRegClass(edit.getReg())->getName() + << ':' << edit.getParent() << "\n"); + assert(edit.getParent().isSpillable() && + "Attempting to spill already spilled value."); reMaterializeAll(); // Remat may handle everything. - if (li_->empty()) + if (edit_->getParent().empty()) return; - stackSlot_ = vrm_.getStackSlot(li->reg); - if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) - stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + rc_ = mri_.getRegClass(edit.getReg()); + stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg()); + + // Update LiveStacks now that we are committed to spilling. + LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_); + assert(stacklvr.empty() && "Just created stack slot not empty"); + stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); + stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0)); // Iterate over instructions using register. - for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg()); MachineInstr *MI = RI.skipInstruction();) { // Debug values are not allowed to affect codegen. @@ -440,7 +394,7 @@ void InlineSpiller::spill(LiveInterval *li, // Analyze instruction. bool Reads, Writes; SmallVector<unsigned, 8> Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops); // Attempt to fold memory ops. if (foldMemoryOperand(MI, Ops)) @@ -448,9 +402,7 @@ void InlineSpiller::spill(LiveInterval *li, // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - unsigned NewVReg = mri_.createVirtualRegister(rc_); - vrm_.grow(); - LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + LiveInterval &NewLI = edit.create(mri_, lis_, vrm_); NewLI.markNotSpillable(); if (Reads) @@ -460,7 +412,7 @@ void InlineSpiller::spill(LiveInterval *li, bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); - MO.setReg(NewVReg); + MO.setReg(NewLI.reg); if (MO.isUse()) { if (!MI->isRegTiedToDefOperand(Ops[i])) MO.setIsKill(); @@ -475,6 +427,5 @@ void InlineSpiller::spill(LiveInterval *li, insertSpill(NewLI, MI); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); - newIntervals.push_back(&NewLI); } } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 3852ebaf6425..3861ddadf655 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -85,9 +85,11 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, } // VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) -#define setjmp_undefined_for_visual_studio -#undef setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc #endif void IntrinsicLowering::AddPrototypes(Module &M) { @@ -536,3 +538,27 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { "Lowering should have eliminated any uses of the intrinsic call!"); CI->eraseFromParent(); } + +bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { + // Verify this is a simple bswap. + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || + !CI->getType()->isIntegerTy()) + return false; + + const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty) + return false; + + // Okay, we can do this xform, do so now. + const Type *Tys[] = { Ty }; + Module *M = CI->getParent()->getParent()->getParent(); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + + Value *Op = CI->getArgOperand(0); + Op = CallInst::Create(Int, Op, CI->getName(), CI); + + CI->replaceAllUsesWith(Op); + CI->eraseFromParent(); + return true; +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 36038027b259..80dfc763af69 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -20,9 +20,11 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" @@ -30,6 +32,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/StandardPasses.h" using namespace llvm; namespace llvm { @@ -140,13 +143,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; - if (ShowMCEncoding) + TargetAsmBackend *TAB = 0; + if (ShowMCEncoding) { MCE = getTarget().createCodeEmitter(*this, *Context); - - AsmStreamer.reset(createAsmStreamer(*Context, Out, - getTargetData()->isLittleEndian(), - getVerboseAsm(), InstPrinter, - MCE, ShowMCInst)); + TAB = getTarget().createAsmBackend(TargetTriple); + } + + MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, + getVerboseAsm(), + hasMCUseLoc(), + InstPrinter, + MCE, TAB, + ShowMCInst); + AsmStreamer.reset(S); break; } case CGFT_ObjectFile: { @@ -159,7 +168,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context, *TAB, Out, MCE, - hasMCRelaxAll())); + hasMCRelaxAll(), + hasMCNoExecStack())); + AsmStreamer.get()->InitSections(); break; } case CGFT_Null: @@ -241,7 +252,7 @@ static void printAndVerify(PassManagerBase &PM, PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); if (VerifyMachineCode) - PM.add(createMachineVerifierPass()); + PM.add(createMachineVerifierPass(Banner)); } /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both @@ -253,6 +264,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, MCContext *&OutContext) { // Standard LLVM-Level Passes. + // Basic AliasAnalysis support. + createStandardAliasAnalysisPasses(&PM); + // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) @@ -288,7 +302,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // edge from elsewhere. PM.add(createSjLjEHPass(getTargetLowering())); // FALLTHROUGH - case ExceptionHandling::Dwarf: + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::DwarfTable: PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: @@ -320,7 +335,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo()); + TargetAsmInfo *TAI = new TargetAsmInfo(*this); + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI); PM.add(MMI); OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. @@ -339,6 +355,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Print the instruction selected machine code... printAndVerify(PM, "After Instruction Selection"); + // Expand pseudo-instructions emitted by ISel. + PM.add(createExpandISelPseudosPass()); + // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. if (OptLevel != CodeGenOpt::None) @@ -356,13 +375,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createDeadMachineInstructionElimPass()); printAndVerify(PM, "After codegen DCE pass"); - PM.add(createPeepholeOptimizerPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + + PM.add(createPeepholeOptimizerPass()); + printAndVerify(PM, "After codegen peephole optimization pass"); } // Pre-ra tail duplication. diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index b9527fafbee8..0eb009ddac29 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { @@ -35,14 +36,14 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { unsigned RHSLatency = PQ->getLatency(RHSNum); if (LHSLatency < RHSLatency) return true; if (LHSLatency > RHSLatency) return false; - + // After that, if two nodes have identical latencies, look to see if one will // unblock more other nodes than the other. unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); if (LHSBlocked < RHSBlocked) return true; if (LHSBlocked > RHSBlocked) return false; - + // Finally, just to provide a stable ordering, use the node number as a // deciding factor. return LHSNum < RHSNum; @@ -64,7 +65,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { OnlyAvailablePred = &Pred; } } - + return OnlyAvailablePred; } @@ -78,7 +79,7 @@ void LatencyPriorityQueue::push(SUnit *SU) { ++NumNodesBlocking; } NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; - + Queue.push_back(SU); } @@ -102,10 +103,10 @@ void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { /// node of the same priority that will not make a node available. void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { if (SU->isAvailable) return; // All preds scheduled. - + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; - + // Okay, we found a single predecessor that is available, but not scheduled. // Since it is available, it must be in the priority queue. First remove it. remove(OnlyAvailablePred); @@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } + +#ifdef NDEBUG +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { + LatencyPriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp new file mode 100644 index 000000000000..853ec1ac7c13 --- /dev/null +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -0,0 +1,711 @@ +//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveDebugVariables analysis. +// +// Remove all DBG_VALUE instructions referencing virtual registers and replace +// them with a data structure tracking where live user variables are kept - in a +// virtual register or in a stack slot. +// +// Allow the data structure to be updated during register allocation when values +// are moved between registers and stack slots. Finally emit new DBG_VALUE +// instructions after register allocation is complete. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "livedebug" +#include "LiveDebugVariables.h" +#include "VirtRegMap.h" +#include "llvm/Constants.h" +#include "llvm/Metadata.h" +#include "llvm/Value.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableLDV("live-debug-variables", cl::init(true), + cl::desc("Enable the live debug variables pass"), cl::Hidden); + +char LiveDebugVariables::ID = 0; + +INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", + "Debug Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars", + "Debug Variable Analysis", false, false) + +void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addRequiredTransitive<LiveIntervals>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); +} + +/// LocMap - Map of where a user value is live, and its location. +typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; + +/// UserValue - A user value is a part of a debug info user variable. +/// +/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register +/// holds part of a user variable. The part is identified by a byte offset. +/// +/// UserValues are grouped into equivalence classes for easier searching. Two +/// user values are related if they refer to the same variable, or if they are +/// held by the same virtual register. The equivalence class is the transitive +/// closure of that relation. +namespace { +class UserValue { + const MDNode *variable; ///< The debug info variable we are part of. + unsigned offset; ///< Byte offset into variable. + DebugLoc dl; ///< The debug location for the variable. This is + ///< used by dwarf writer to find lexical scope. + UserValue *leader; ///< Equivalence class leader. + UserValue *next; ///< Next value in equivalence class, or null. + + /// Numbered locations referenced by locmap. + SmallVector<MachineOperand, 4> locations; + + /// Map of slot indices where this value is live. + LocMap locInts; + + /// coalesceLocation - After LocNo was changed, check if it has become + /// identical to another location, and coalesce them. This may cause LocNo or + /// a later location to be erased, but no earlier location will be erased. + void coalesceLocation(unsigned LocNo); + + /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo. + void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, + LiveIntervals &LIS, const TargetInstrInfo &TII); + + /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx. + void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS, const TargetInstrInfo &TII); + +public: + /// UserValue - Create a new UserValue. + UserValue(const MDNode *var, unsigned o, DebugLoc L, + LocMap::Allocator &alloc) + : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) + {} + + /// getLeader - Get the leader of this value's equivalence class. + UserValue *getLeader() { + UserValue *l = leader; + while (l != l->leader) + l = l->leader; + return leader = l; + } + + /// getNext - Return the next UserValue in the equivalence class. + UserValue *getNext() const { return next; } + + /// match - Does this UserValue match the aprameters? + bool match(const MDNode *Var, unsigned Offset) const { + return Var == variable && Offset == offset; + } + + /// merge - Merge equivalence classes. + static UserValue *merge(UserValue *L1, UserValue *L2) { + L2 = L2->getLeader(); + if (!L1) + return L2; + L1 = L1->getLeader(); + if (L1 == L2) + return L1; + // Splice L2 before L1's members. + UserValue *End = L2; + while (End->next) + End->leader = L1, End = End->next; + End->leader = L1; + End->next = L1->next; + L1->next = L2; + return L1; + } + + /// getLocationNo - Return the location number that matches Loc. + unsigned getLocationNo(const MachineOperand &LocMO) { + if (LocMO.isReg() && LocMO.getReg() == 0) + return ~0u; + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (LocMO.isIdenticalTo(locations[i])) + return i; + locations.push_back(LocMO); + // We are storing a MachineOperand outside a MachineInstr. + locations.back().clearParent(); + return locations.size() - 1; + } + + /// addDef - Add a definition point to this value. + void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + // Add a singular (Idx,Idx) -> Loc mapping. + LocMap::iterator I = locInts.find(Idx); + if (!I.valid() || I.start() != Idx) + I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO)); + } + + /// extendDef - Extend the current definition as far as possible down the + /// dominator tree. Stop when meeting an existing def or when leaving the live + /// range of VNI. + /// @param Idx Starting point for the definition. + /// @param LocNo Location number to propagate. + /// @param LI Restrict liveness to where LI has the value VNI. May be null. + /// @param VNI When LI is not null, this is the value to restrict to. + /// @param LIS Live intervals analysis. + /// @param MDT Dominator tree. + void extendDef(SlotIndex Idx, unsigned LocNo, + LiveInterval *LI, const VNInfo *VNI, + LiveIntervals &LIS, MachineDominatorTree &MDT); + + /// computeIntervals - Compute the live intervals of all locations after + /// collecting all their def points. + void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT); + + /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. + void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, + const TargetRegisterInfo *TRI); + + /// rewriteLocations - Rewrite virtual register locations according to the + /// provided virtual register map. + void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); + + /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + void emitDebugValues(VirtRegMap *VRM, + LiveIntervals &LIS, const TargetInstrInfo &TRI); + + /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A + /// variable may have more than one corresponding DBG_VALUE instructions. + /// Only first one needs DebugLoc to identify variable's lexical scope + /// in source file. + DebugLoc findDebugLoc(); + void print(raw_ostream&, const TargetRegisterInfo*); +}; +} // namespace + +/// LDVImpl - Implementation of the LiveDebugVariables pass. +namespace { +class LDVImpl { + LiveDebugVariables &pass; + LocMap::Allocator allocator; + MachineFunction *MF; + LiveIntervals *LIS; + MachineDominatorTree *MDT; + const TargetRegisterInfo *TRI; + + /// userValues - All allocated UserValue instances. + SmallVector<UserValue*, 8> userValues; + + /// Map virtual register to eq class leader. + typedef DenseMap<unsigned, UserValue*> VRMap; + VRMap virtRegToEqClass; + + /// Map user variable to eq class leader. + typedef DenseMap<const MDNode *, UserValue*> UVMap; + UVMap userVarMap; + + /// getUserValue - Find or create a UserValue. + UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL); + + /// lookupVirtReg - Find the EC leader for VirtReg or null. + UserValue *lookupVirtReg(unsigned VirtReg); + + /// mapVirtReg - Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); + + /// handleDebugValue - Add DBG_VALUE instruction to our maps. + /// @param MI DBG_VALUE instruction + /// @param Idx Last valid SLotIndex before instruction. + /// @return True if the DBG_VALUE instruction should be deleted. + bool handleDebugValue(MachineInstr *MI, SlotIndex Idx); + + /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding + /// a UserValue def for each instruction. + /// @param mf MachineFunction to be scanned. + /// @return True if any debug values were found. + bool collectDebugValues(MachineFunction &mf); + + /// computeIntervals - Compute the live intervals of all user values after + /// collecting all their def points. + void computeIntervals(); + +public: + LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} + bool runOnMachineFunction(MachineFunction &mf); + + /// clear - Relase all memory. + void clear() { + DeleteContainerPointers(userValues); + userValues.clear(); + virtRegToEqClass.clear(); + userVarMap.clear(); + } + + /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx. + void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); + + /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + void emitDebugValues(VirtRegMap *VRM); + + void print(raw_ostream&); +}; +} // namespace + +void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2))) + OS << "!\"" << MDS->getString() << "\"\t"; + if (offset) + OS << '+' << offset; + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { + OS << " [" << I.start() << ';' << I.stop() << "):"; + if (I.value() == ~0u) + OS << "undef"; + else + OS << I.value(); + } + for (unsigned i = 0, e = locations.size(); i != e; ++i) + OS << " Loc" << i << '=' << locations[i]; + OS << '\n'; +} + +void LDVImpl::print(raw_ostream &OS) { + OS << "********** DEBUG VARIABLES **********\n"; + for (unsigned i = 0, e = userValues.size(); i != e; ++i) + userValues[i]->print(OS, TRI); +} + +void UserValue::coalesceLocation(unsigned LocNo) { + unsigned KeepLoc = 0; + for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) { + if (KeepLoc == LocNo) + continue; + if (locations[KeepLoc].isIdenticalTo(locations[LocNo])) + break; + } + // No matches. + if (KeepLoc == locations.size()) + return; + + // Keep the smaller location, erase the larger one. + unsigned EraseLoc = LocNo; + if (KeepLoc > EraseLoc) + std::swap(KeepLoc, EraseLoc); + locations.erase(locations.begin() + EraseLoc); + + // Rewrite values. + for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { + unsigned v = I.value(); + if (v == EraseLoc) + I.setValue(KeepLoc); // Coalesce when possible. + else if (v > EraseLoc) + I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values. + } +} + +UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, + DebugLoc DL) { + UserValue *&Leader = userVarMap[Var]; + if (Leader) { + UserValue *UV = Leader->getLeader(); + Leader = UV; + for (; UV; UV = UV->getNext()) + if (UV->match(Var, Offset)) + return UV; + } + + UserValue *UV = new UserValue(Var, Offset, DL, allocator); + userValues.push_back(UV); + Leader = UserValue::merge(Leader, UV); + return UV; +} + +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + UserValue *&Leader = virtRegToEqClass[VirtReg]; + Leader = UserValue::merge(Leader, EC); +} + +UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { + if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) + return UV->getLeader(); + return 0; +} + +bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { + // DBG_VALUE loc, offset, variable + if (MI->getNumOperands() != 3 || + !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) { + DEBUG(dbgs() << "Can't handle " << *MI); + return false; + } + + // Get or create the UserValue for (variable,offset). + unsigned Offset = MI->getOperand(1).getImm(); + const MDNode *Var = MI->getOperand(2).getMetadata(); + UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); + + // If the location is a virtual register, make sure it is mapped. + if (MI->getOperand(0).isReg()) { + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + mapVirtReg(Reg, UV); + } + + UV->addDef(Idx, MI->getOperand(0)); + return true; +} + +bool LDVImpl::collectDebugValues(MachineFunction &mf) { + bool Changed = false; + for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; + ++MFI) { + MachineBasicBlock *MBB = MFI; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI != MBBE;) { + if (!MBBI->isDebugValue()) { + ++MBBI; + continue; + } + // DBG_VALUE has no slot index, use the previous instruction instead. + SlotIndex Idx = MBBI == MBB->begin() ? + LIS->getMBBStartIdx(MBB) : + LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex(); + // Handle consecutive DBG_VALUE instructions with the same slot index. + do { + if (handleDebugValue(MBBI, Idx)) { + MBBI = MBB->erase(MBBI); + Changed = true; + } else + ++MBBI; + } while (MBBI != MBBE && MBBI->isDebugValue()); + } + } + return Changed; +} + +void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, + LiveInterval *LI, const VNInfo *VNI, + LiveIntervals &LIS, MachineDominatorTree &MDT) { + SmallVector<SlotIndex, 16> Todo; + Todo.push_back(Idx); + + do { + SlotIndex Start = Todo.pop_back_val(); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); + SlotIndex Stop = LIS.getMBBEndIdx(MBB); + LocMap::iterator I = locInts.find(Start); + + // Limit to VNI's live range. + bool ToEnd = true; + if (LI && VNI) { + LiveRange *Range = LI->getLiveRangeContaining(Start); + if (!Range || Range->valno != VNI) + continue; + if (Range->end < Stop) + Stop = Range->end, ToEnd = false; + } + + // There could already be a short def at Start. + if (I.valid() && I.start() <= Start) { + // Stop when meeting a different location or an already extended interval. + Start = Start.getNextSlot(); + if (I.value() != LocNo || I.stop() != Start) + continue; + // This is a one-slot placeholder. Just skip it. + ++I; + } + + // Limited by the next def. + if (I.valid() && I.start() < Stop) + Stop = I.start(), ToEnd = false; + + if (Start >= Stop) + continue; + + I.insert(Start, Stop, LocNo); + + // If we extended to the MBB end, propagate down the dominator tree. + if (!ToEnd) + continue; + const std::vector<MachineDomTreeNode*> &Children = + MDT.getNode(MBB)->getChildren(); + for (unsigned i = 0, e = Children.size(); i != e; ++i) + Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock())); + } while (!Todo.empty()); +} + +void +UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { + SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; + + // Collect all defs to be extended (Skipping undefs). + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) + if (I.value() != ~0u) + Defs.push_back(std::make_pair(I.start(), I.value())); + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + SlotIndex Idx = Defs[i].first; + unsigned LocNo = Defs[i].second; + const MachineOperand &Loc = locations[LocNo]; + + // Register locations are constrained to where the register value is live. + if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { + LiveInterval *LI = &LIS.getInterval(Loc.getReg()); + const VNInfo *VNI = LI->getVNInfoAt(Idx); + extendDef(Idx, LocNo, LI, VNI, LIS, MDT); + } else + extendDef(Idx, LocNo, 0, 0, LIS, MDT); + } + + // Finally, erase all the undefs. + for (LocMap::iterator I = locInts.begin(); I.valid();) + if (I.value() == ~0u) + I.erase(); + else + ++I; +} + +void LDVImpl::computeIntervals() { + for (unsigned i = 0, e = userValues.size(); i != e; ++i) + userValues[i]->computeIntervals(*LIS, *MDT); +} + +bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + LIS = &pass.getAnalysis<LiveIntervals>(); + MDT = &pass.getAnalysis<MachineDominatorTree>(); + TRI = mf.getTarget().getRegisterInfo(); + clear(); + DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " + << ((Value*)mf.getFunction())->getName() + << " **********\n"); + + bool Changed = collectDebugValues(mf); + computeIntervals(); + DEBUG(print(dbgs())); + return Changed; +} + +bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { + if (!EnableLDV) + return false; + if (!pImpl) + pImpl = new LDVImpl(this); + return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); +} + +void LiveDebugVariables::releaseMemory() { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->clear(); +} + +LiveDebugVariables::~LiveDebugVariables() { + if (pImpl) + delete static_cast<LDVImpl*>(pImpl); +} + +void UserValue:: +renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, + const TargetRegisterInfo *TRI) { + for (unsigned i = locations.size(); i; --i) { + unsigned LocNo = i - 1; + MachineOperand &Loc = locations[LocNo]; + if (!Loc.isReg() || Loc.getReg() != OldReg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + Loc.substPhysReg(NewReg, *TRI); + else + Loc.substVirtReg(NewReg, SubIdx, *TRI); + coalesceLocation(LocNo); + } +} + +void LDVImpl:: +renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { + UserValue *UV = lookupVirtReg(OldReg); + if (!UV) + return; + + if (TargetRegisterInfo::isVirtualRegister(NewReg)) + mapVirtReg(NewReg, UV); + virtRegToEqClass.erase(OldReg); + + do { + UV->renameRegister(OldReg, NewReg, SubIdx, TRI); + UV = UV->getNext(); + } while (UV); +} + +void LiveDebugVariables:: +renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx); +} + +void +UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { + // Iterate over locations in reverse makes it easier to handle coalescing. + for (unsigned i = locations.size(); i ; --i) { + unsigned LocNo = i-1; + MachineOperand &Loc = locations[LocNo]; + // Only virtual registers are rewritten. + if (!Loc.isReg() || !Loc.getReg() || + !TargetRegisterInfo::isVirtualRegister(Loc.getReg())) + continue; + unsigned VirtReg = Loc.getReg(); + if (VRM.isAssignedReg(VirtReg) && + TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT && + VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) { + // FIXME: Translate SubIdx to a stackslot offset. + Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); + } else { + Loc.setReg(0); + Loc.setSubReg(0); + } + coalesceLocation(LocNo); + } + DEBUG(print(dbgs(), &TRI)); +} + +/// findInsertLocation - Find an iterator for inserting a DBG_VALUE +/// instruction. +static MachineBasicBlock::iterator +findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS) { + SlotIndex Start = LIS.getMBBStartIdx(MBB); + Idx = Idx.getBaseIndex(); + + // Try to find an insert location by going backwards from Idx. + MachineInstr *MI; + while (!(MI = LIS.getInstructionFromIndex(Idx))) { + // We've reached the beginning of MBB. + if (Idx == Start) { + MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin()); + return I; + } + Idx = Idx.getPrevIndex(); + } + + // Don't insert anything after the first terminator, though. + return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() : + llvm::next(MachineBasicBlock::iterator(MI)); +} + +DebugLoc UserValue::findDebugLoc() { + DebugLoc D = dl; + dl = DebugLoc(); + return D; +} +void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, + unsigned LocNo, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); + MachineOperand &Loc = locations[LocNo]; + + // Frame index locations may require a target callback. + if (Loc.isFI()) { + MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(), + Loc.getIndex(), offset, variable, + findDebugLoc()); + if (MI) { + MBB->insert(I, MI); + return; + } + } + // This is not a frame index, or the target is happy with a standard FI. + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc).addImm(offset).addMetadata(variable); +} + +void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS, const TargetInstrInfo &TII) { + MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0) + .addImm(offset).addMetadata(variable); +} + +void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); + + for (LocMap::const_iterator I = locInts.begin(); I.valid();) { + SlotIndex Start = I.start(); + SlotIndex Stop = I.stop(); + unsigned LocNo = I.value(); + DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + insertDebugValue(MBB, Start, LocNo, LIS, TII); + + // This interval may span multiple basic blocks. + // Insert a DBG_VALUE into each one. + while(Stop > MBBEnd) { + // Move to the next block. + Start = MBBEnd; + if (++MBB == MFEnd) + break; + MBBEnd = LIS.getMBBEndIdx(MBB); + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + insertDebugValue(MBB, Start, LocNo, LIS, TII); + } + DEBUG(dbgs() << '\n'); + if (MBB == MFEnd) + break; + + ++I; + if (Stop == MBBEnd) + continue; + // The current interval ends before MBB. + // Insert a kill if there is a gap. + if (!I.valid() || I.start() > Stop) + insertDebugKill(MBB, Stop, LIS, TII); + } +} + +void LDVImpl::emitDebugValues(VirtRegMap *VRM) { + DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + userValues[i]->rewriteLocations(*VRM, *TRI); + userValues[i]->emitDebugValues(VRM, *LIS, *TII); + } +} + +void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); +} + + +#ifndef NDEBUG +void LiveDebugVariables::dump() { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->print(dbgs()); +} +#endif + diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h new file mode 100644 index 000000000000..a6e40a198456 --- /dev/null +++ b/lib/CodeGen/LiveDebugVariables.h @@ -0,0 +1,63 @@ +//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface to the LiveDebugVariables analysis. +// +// The analysis removes DBG_VALUE instructions for virtual registers and tracks +// live user variables in a data structure that can be updated during register +// allocation. +// +// After register allocation new DBG_VALUE instructions are emitted to reflect +// the new locations of user variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H + +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class VirtRegMap; + +class LiveDebugVariables : public MachineFunctionPass { + void *pImpl; +public: + static char ID; // Pass identification, replacement for typeid + + LiveDebugVariables(); + ~LiveDebugVariables(); + + /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx. + /// @param OldReg Old virtual register that is going away. + /// @param NewReg New register holding the user variables. + /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub- + /// register. + void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); + + /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes + /// that happened during register allocation. + /// @param VRM Rename virtual registers according to map. + void emitDebugValues(VirtRegMap *VRM); + + /// dump - Print data structures to dbgs(). + void dump(); + +private: + + virtual bool runOnMachineFunction(MachineFunction &); + virtual void releaseMemory(); + virtual void getAnalysisUsage(AnalysisUsage &) const; + +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 59f380ad2641..c2dbd6ab75a1 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -30,58 +30,19 @@ #include <algorithm> using namespace llvm; -// An example for liveAt(): -// -// this = [1,4), liveAt(0) will return false. The instruction defining this -// spans slots [0,3]. The interval belongs to an spilled definition of the -// variable it represents. This is because slot 1 is used (def slot) and spans -// up to slot 3 (store slot). -// -bool LiveInterval::liveAt(SlotIndex I) const { - Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); - - if (r == ranges.begin()) - return false; - - --r; - return r->contains(I); -} - -// liveBeforeAndAt - Check if the interval is live at the index and the index -// just before it. If index is liveAt, check if it starts a new live range. -// If it does, then check if the previous live range ends at index-1. -bool LiveInterval::liveBeforeAndAt(SlotIndex I) const { - Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); - - if (r == ranges.begin()) - return false; - - --r; - if (!r->contains(I)) - return false; - if (I != r->start) - return true; - // I is the start of a live range. Check if the previous live range ends - // at I-1. - if (r == ranges.begin()) - return false; - return r->end == I; +// CompEnd - Compare LiveRange ends. +namespace { +struct CompEnd { + bool operator()(const LiveRange &A, const LiveRange &B) const { + return A.end < B.end; + } +}; } -/// killedAt - Return true if a live range ends at index. Note that the kill -/// point is not contained in the half-open live range. It is usually the -/// getDefIndex() slot following its last use. -bool LiveInterval::killedAt(SlotIndex I) const { - Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I); - - // Now r points to the first interval with start >= I, or ranges.end(). - if (r == ranges.begin()) - return false; - - --r; - // Now r points to the last interval with end <= I. - // r->end is the kill point. - return r->end == I; +LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { + assert(Pos.isValid() && "Cannot search for an invalid index"); + return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0), + CompEnd()); } /// killedInRange - Return true if the interval has kills in [Start,End). @@ -330,25 +291,14 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } -/// isInOneLiveRange - Return true if the range specified is entirely in -/// a single LiveRange of the live interval. -bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) { - Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); - if (I == ranges.begin()) - return false; - --I; - return I->containsRange(Start, End); -} - /// removeRange - Remove the specified range from this interval. Note that /// the range must be in a single LiveRange in its entirety. void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, bool RemoveDeadValNo) { // Find the LiveRange containing this span. - Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); - assert(I != ranges.begin() && "Range is not in interval!"); - --I; + Ranges::iterator I = find(Start); + assert(I != ranges.end() && "Range is not in interval!"); assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); // If the span we are removing is at the start of the LiveRange, adjust it. @@ -405,32 +355,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { markValNoForDeletion(ValNo); } -/// getLiveRangeContaining - Return the live range that contains the -/// specified index, or null if there is none. -LiveInterval::const_iterator -LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const { - const_iterator It = std::upper_bound(begin(), end(), Idx); - if (It != ranges.begin()) { - --It; - if (It->contains(Idx)) - return It; - } - - return end(); -} - -LiveInterval::iterator -LiveInterval::FindLiveRangeContaining(SlotIndex Idx) { - iterator It = std::upper_bound(begin(), end(), Idx); - if (It != begin()) { - --It; - if (It->contains(Idx)) - return It; - } - - return end(); -} - /// findDefinedVNInfo - Find the VNInfo defined by the specified /// index (register interval). VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const { @@ -443,17 +367,6 @@ VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const { return 0; } -/// findDefinedVNInfo - Find the VNInfo defined by the specified -/// register (stack inteval). -VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const { - for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); - i != e; ++i) { - if ((*i)->getReg() == reg) - return *i; - } - return 0; -} - /// join - Join two live intervals (this, and other) together. This applies /// mappings to the value numbers in the LHS/RHS intervals as specified. If /// the intervals are not joinable, this aborts. @@ -616,103 +529,6 @@ void LiveInterval::MergeValueInAsValue( } -/// MergeInClobberRanges - For any live ranges that are not defined in the -/// current interval, but are defined in the Clobbers interval, mark them -/// used with an unknown definition value. -void LiveInterval::MergeInClobberRanges(LiveIntervals &li_, - const LiveInterval &Clobbers, - VNInfo::Allocator &VNInfoAllocator) { - if (Clobbers.empty()) return; - - DenseMap<VNInfo*, VNInfo*> ValNoMaps; - VNInfo *UnusedValNo = 0; - iterator IP = begin(); - for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) { - // For every val# in the Clobbers interval, create a new "unknown" val#. - VNInfo *ClobberValNo = 0; - DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno); - if (VI != ValNoMaps.end()) - ClobberValNo = VI->second; - else if (UnusedValNo) - ClobberValNo = UnusedValNo; - else { - UnusedValNo = ClobberValNo = - getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator); - ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); - } - - bool Done = false; - SlotIndex Start = I->start, End = I->end; - // If a clobber range starts before an existing range and ends after - // it, the clobber range will need to be split into multiple ranges. - // Loop until the entire clobber range is handled. - while (!Done) { - Done = true; - IP = std::upper_bound(IP, end(), Start); - SlotIndex SubRangeStart = Start; - SlotIndex SubRangeEnd = End; - - // If the start of this range overlaps with an existing liverange, trim it. - if (IP != begin() && IP[-1].end > SubRangeStart) { - SubRangeStart = IP[-1].end; - // Trimmed away the whole range? - if (SubRangeStart >= SubRangeEnd) continue; - } - // If the end of this range overlaps with an existing liverange, trim it. - if (IP != end() && SubRangeEnd > IP->start) { - // If the clobber live range extends beyond the existing live range, - // it'll need at least another live range, so set the flag to keep - // iterating. - if (SubRangeEnd > IP->end) { - Start = IP->end; - Done = false; - } - SubRangeEnd = IP->start; - // If this trimmed away the whole range, ignore it. - if (SubRangeStart == SubRangeEnd) continue; - } - - // Insert the clobber interval. - IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo), - IP); - UnusedValNo = 0; - } - } - - if (UnusedValNo) { - // Delete the last unused val#. - valnos.pop_back(); - } -} - -void LiveInterval::MergeInClobberRange(LiveIntervals &li_, - SlotIndex Start, - SlotIndex End, - VNInfo::Allocator &VNInfoAllocator) { - // Find a value # to use for the clobber ranges. If there is already a value# - // for unknown values, use it. - VNInfo *ClobberValNo = - getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator); - - iterator IP = begin(); - IP = std::upper_bound(IP, end(), Start); - - // If the start of this range overlaps with an existing liverange, trim it. - if (IP != begin() && IP[-1].end > Start) { - Start = IP[-1].end; - // Trimmed away the whole range? - if (Start >= End) return; - } - // If the end of this range overlaps with an existing liverange, trim it. - if (IP != end() && End > IP->start) { - End = IP->start; - // If this trimmed away the whole range, ignore it. - if (Start == End) return; - } - - // Insert the clobber interval. - addRangeFrom(LiveRange(Start, End, ClobberValNo), IP); -} /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all @@ -767,6 +583,9 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } + // Merge the relevant flags. + V2->mergeFlags(V1); + // Now that V1 is dead, remove it. markValNoForDeletion(V1); @@ -831,14 +650,9 @@ void LiveRange::dump() const { } void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { - if (isStackSlot()) - OS << "SS#" << getStackSlotIndex(); - else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg)) - OS << TRI->getName(reg); - else - OS << "%reg" << reg; - - OS << ',' << weight; + OS << PrintReg(reg, TRI); + if (weight != 0) + OS << ',' << weight; if (empty()) OS << " EMPTY"; @@ -863,10 +677,9 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { if (vni->isUnused()) { OS << "x"; } else { - if (!vni->isDefAccurate() && !vni->isPHIDef()) - OS << "?"; - else - OS << vni->def; + OS << vni->def; + if (vni->isPHIDef()) + OS << "-phidef"; if (vni->hasPHIKill()) OS << "-phikill"; if (vni->hasRedefByEC()) @@ -884,3 +697,84 @@ void LiveInterval::dump() const { void LiveRange::print(raw_ostream &os) const { os << *this; } + +unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { + // Create initial equivalence classes. + eqClass_.clear(); + eqClass_.grow(LI->getNumValNums()); + + const VNInfo *used = 0, *unused = 0; + + // Determine connections. + for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end(); + I != E; ++I) { + const VNInfo *VNI = *I; + // Group all unused values into one class. + if (VNI->isUnused()) { + if (unused) + eqClass_.join(unused->id, VNI->id); + unused = VNI; + continue; + } + used = VNI; + if (VNI->isPHIDef()) { + const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def); + assert(MBB && "Phi-def has no defining MBB"); + // Connect to values live out of predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) + if (const VNInfo *PVNI = + LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot())) + eqClass_.join(VNI->id, PVNI->id); + } else { + // Normal value defined by an instruction. Check for two-addr redef. + // FIXME: This could be coincidental. Should we really check for a tied + // operand constraint? + // Note that VNI->def may be a use slot for an early clobber def. + if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) + eqClass_.join(VNI->id, UVNI->id); + } + } + + // Lump all the unused values in with the last used value. + if (used && unused) + eqClass_.join(used->id, unused->id); + + eqClass_.compress(); + return eqClass_.getNumClasses(); +} + +void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { + assert(LIV[0] && "LIV[0] must be set"); + LiveInterval &LI = *LIV[0]; + + // First move runs to new intervals. + LiveInterval::iterator J = LI.begin(), E = LI.end(); + while (J != E && eqClass_[J->valno->id] == 0) + ++J; + for (LiveInterval::iterator I = J; I != E; ++I) { + if (unsigned eq = eqClass_[I->valno->id]) { + assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && + "New intervals should be empty"); + LIV[eq]->ranges.push_back(*I); + } else + *J++ = *I; + } + LI.ranges.erase(J, E); + + // Transfer VNInfos to their new owners and renumber them. + unsigned j = 0, e = LI.getNumValNums(); + while (j != e && eqClass_[j] == 0) + ++j; + for (unsigned i = j; i != e; ++i) { + VNInfo *VNI = LI.getValNumInfo(i); + if (unsigned eq = eqClass_[i]) { + VNI->id = LIV[eq]->getNumValNums(); + LIV[eq]->valnos.push_back(VNI); + } else { + VNI->id = j; + LI.valnos[j++] = VNI; + } + } + LI.valnos.resize(j); +} diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 2726fc337539..aef5b5f77e78 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -20,6 +20,7 @@ #include "VirtRegMap.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -55,8 +56,17 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; -INITIALIZE_PASS(LiveIntervals, "liveintervals", - "Live Interval Analysis", false, false); +INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(PHIElimination) +INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) +INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false) void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -132,19 +142,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { void LiveIntervals::printInstrs(raw_ostream &OS) const { OS << "********** MACHINEINSTRS **********\n"; - - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - OS << "BB#" << mbbi->getNumber() - << ":\t\t# derived from " << mbbi->getName() << "\n"; - for (MachineBasicBlock::iterator mii = mbbi->begin(), - mie = mbbi->end(); mii != mie; ++mii) { - if (mii->isDebugValue()) - OS << " \t" << *mii; - else - OS << getInstructionIndex(mii) << '\t' << *mii; - } - } + mf_->print(OS, indexes_); } void LiveIntervals::dumpInstrs() const { @@ -248,15 +246,6 @@ bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, return false; } -#ifndef NDEBUG -static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { - if (TargetRegisterInfo::isPhysicalRegister(reg)) - dbgs() << tri_->getName(reg); - else - dbgs() << "%reg" << reg; -} -#endif - static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { unsigned Reg = MI.getOperand(MOIdx).getReg(); @@ -285,8 +274,8 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, SlotIndex RedefIndex = MIIdx.getDefIndex(); const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex.getUseIndex()); - if (OldLR->valno->isDefAccurate()) { - MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); + MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); + if (DefMI != 0) { return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; } return false; @@ -298,10 +287,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineOperand& MO, unsigned MOIdx, LiveInterval &interval) { - DEBUG({ - dbgs() << "\t\tregister: "; - printRegName(interval.reg, tri_); - }); + DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); // Virtual registers may be defined multiple times (due to phi // elimination and 2-addr elimination). Much of what we do only has to be @@ -326,8 +312,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, CopyMI = mi; } - VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true, - VNInfoAllocator); + VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); assert(ValNo->id == 0 && "First value in interval is not 0?"); // Loop over all of the blocks that the vreg is defined in. There are @@ -393,8 +378,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Create interval with one of a NEW value number. Note that this value // number isn't actually defined by an instruction, weird huh? :) if (PHIJoin) { - ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false, - VNInfoAllocator); + assert(getInstructionFromIndex(Start) == 0 && + "PHI def index points at actual instruction."); + ValNo = interval.getNextValue(Start, 0, VNInfoAllocator); ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); @@ -440,10 +426,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // The new value number (#1) is defined by the instruction we claimed // defined value #0. - VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(), - false, // update at * - VNInfoAllocator); - ValNo->setFlags(OldValNo->getFlags()); // * <- updating here + VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; @@ -481,7 +464,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineInstr *CopyMI = NULL; if (mi->isCopyLike()) CopyMI = mi; - ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); + ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator); SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); @@ -504,10 +487,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, MachineInstr *CopyMI) { // A physical register cannot be live across basic block, so its // lifetime must end somewhere in its defining basic block. - DEBUG({ - dbgs() << "\t\tregister: "; - printRegName(interval.reg, tri_); - }); + DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); SlotIndex baseIndex = MIIdx; SlotIndex start = baseIndex.getDefIndex(); @@ -573,11 +553,11 @@ exit: assert(start < end && "did not find end of interval?"); // Already exists? Extend old live interval. - LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start); - bool Extend = OldLR != interval.end(); - VNInfo *ValNo = Extend - ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator); - if (MO.isEarlyClobber() && Extend) + VNInfo *ValNo = interval.getVNInfoAt(start); + bool Extend = ValNo != 0; + if (!Extend) + ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator); + if (Extend && MO.isEarlyClobber()) ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); @@ -611,10 +591,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex MIIdx, LiveInterval &interval, bool isAlias) { - DEBUG({ - dbgs() << "\t\tlivein register: "; - printRegName(interval.reg, tri_); - }); + DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_)); // Look for kills, if it reaches a def before it's killed, then it shouldn't // be considered a livein. @@ -672,9 +649,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, } } + SlotIndex defIdx = getMBBStartIdx(MBB); + assert(getInstructionFromIndex(defIdx) == 0 && + "PHI def index points at actual instruction."); VNInfo *vni = - interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true), - 0, false, VNInfoAllocator); + interval.getNextValue(defIdx, 0, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); @@ -764,10 +743,177 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { return NewLI; } +/// shrinkToUses - After removing some uses of a register, shrink its live +/// range to just the remaining uses. This method does not compute reaching +/// defs for new uses, and it doesn't remove dead defs. +void LiveIntervals::shrinkToUses(LiveInterval *li) { + DEBUG(dbgs() << "Shrink: " << *li << '\n'); + assert(TargetRegisterInfo::isVirtualRegister(li->reg) + && "Can't only shrink physical registers"); + // Find all the values used, including PHI kills. + SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; + + // Visit all instructions reading li->reg. + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg); + MachineInstr *UseMI = I.skipInstruction();) { + if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) + continue; + SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); + VNInfo *VNI = li->getVNInfoAt(Idx); + assert(VNI && "Live interval not live into reading instruction"); + if (VNI->def == Idx) { + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. + Idx = Idx.getPrevSlot(); + VNI = li->getVNInfoAt(Idx); + assert(VNI && "Early-clobber tied value not available"); + } + WorkList.push_back(std::make_pair(Idx, VNI)); + } + + // Create a new live interval with only minimal live segments per def. + LiveInterval NewLI(li->reg, 0); + for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); + } + + // Extend intervals to reach all uses in WorkList. + while (!WorkList.empty()) { + SlotIndex Idx = WorkList.back().first; + VNInfo *VNI = WorkList.back().second; + WorkList.pop_back(); + + // Extend the live range for VNI to be live at Idx. + LiveInterval::iterator I = NewLI.find(Idx); + + // Already got it? + if (I != NewLI.end() && I->start <= Idx) { + assert(I->valno == VNI && "Unexpected existing value number"); + continue; + } + + // Is there already a live range in the block containing Idx? + const MachineBasicBlock *MBB = getMBBFromIndex(Idx); + SlotIndex BlockStart = getMBBStartIdx(MBB); + DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx + << " in BB#" << MBB->getNumber() << '@' << BlockStart); + if (I != NewLI.begin() && (--I)->end > BlockStart) { + assert(I->valno == VNI && "Wrong reaching def"); + DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n"); + // Is this the first use of a PHIDef in its defining block? + if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) { + // The PHI is live, make sure the predecessors are live-out. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + VNInfo *PVNI = li->getVNInfoAt(Stop); + // A predecessor is not required to have a live-out value for a PHI. + if (PVNI) { + assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag"); + WorkList.push_back(std::make_pair(Stop, PVNI)); + } + } + } + + // Extend the live range in the block to include Idx. + NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI)); + continue; + } + + // VNI is live-in to MBB. + DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); + NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI)); + + // Make sure VNI is live-out from the predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor"); + WorkList.push_back(std::make_pair(Stop, VNI)); + } + } + + // Handle dead values. + for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); + assert(LII != NewLI.end() && "Missing live range for PHI"); + if (LII->end != VNI->def.getNextSlot()) + continue; + if (!VNI->isPHIDef()) { + // This is a dead PHI. Remove it. + VNI->setIsUnused(true); + NewLI.removeRange(*LII); + } else { + // This is a dead def. Make sure the instruction knows. + MachineInstr *MI = getInstructionFromIndex(VNI->def); + assert(MI && "No instruction defining live value"); + MI->addRegisterDead(li->reg, tri_); + } + } + + // Move the trimmed ranges back. + li->ranges.swap(NewLI.ranges); + DEBUG(dbgs() << "Shrink: " << *li << '\n'); +} + + //===----------------------------------------------------------------------===// // Register allocator hooks. // +MachineBasicBlock::iterator +LiveIntervals::getLastSplitPoint(const LiveInterval &li, + MachineBasicBlock *mbb) const { + const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor(); + + // If li is not live into a landing pad, we can insert spill code before the + // first terminator. + if (!lpad || !isLiveInToMBB(li, lpad)) + return mbb->getFirstTerminator(); + + // When there is a landing pad, spill code must go before the call instruction + // that can throw. + MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin(); + while (I != B) { + --I; + if (I->getDesc().isCall()) + return I; + } + // The block contains no calls that can throw, so use the first terminator. + return mbb->getFirstTerminator(); +} + +void LiveIntervals::addKillFlags() { + for (iterator I = begin(), E = end(); I != E; ++I) { + unsigned Reg = I->first; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (mri_->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = I->second; + + // Every instruction that kills Reg corresponds to a live range end point. + for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; + ++RI) { + // A LOAD index indicates an MBB edge. + if (RI->end.isLoad()) + continue; + MachineInstr *MI = getInstructionFromIndex(RI->end); + if (!MI) + continue; + MI->addRegisterKilled(Reg, NULL); + } + } +} + /// getReMatImplicitUse - If the remat definition MI has one (for now, we only /// allow one) virtual register operand, then its uses are implicitly using /// the register. Returns the virtual register. @@ -800,18 +946,17 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, /// which reaches the given instruction also reaches the specified use index. bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, SlotIndex UseIdx) const { - SlotIndex Index = getInstructionIndex(MI); - VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; - LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); - return UI != li.end() && UI->valno == ValNo; + VNInfo *UValNo = li.getVNInfoAt(UseIdx); + return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI)); } /// isReMaterializable - Returns true if the definition MI of the specified /// val# of the specified interval is re-materializable. -bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI, - SmallVectorImpl<LiveInterval*> &SpillIs, - bool &isLoad) { +bool +LiveIntervals::isReMaterializable(const LiveInterval &li, + const VNInfo *ValNo, MachineInstr *MI, + const SmallVectorImpl<LiveInterval*> &SpillIs, + bool &isLoad) { if (DisableReMat) return false; @@ -829,7 +974,7 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, ri != re; ++ri) { MachineInstr *UseMI = &*ri; SlotIndex UseIdx = getInstructionIndex(UseMI); - if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) + if (li.getVNInfoAt(UseIdx) != ValNo) continue; if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) return false; @@ -855,9 +1000,10 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. -bool LiveIntervals::isReMaterializable(const LiveInterval &li, - SmallVectorImpl<LiveInterval*> &SpillIs, - bool &isLoad) { +bool +LiveIntervals::isReMaterializable(const LiveInterval &li, + const SmallVectorImpl<LiveInterval*> &SpillIs, + bool &isLoad) { isLoad = false; for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); i != e; ++i) { @@ -865,9 +1011,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, if (VNI->isUnused()) continue; // Dead val#. // Is the def for the val# rematerializable? - if (!VNI->isDefAccurate()) - return false; MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); + if (!ReMatDefMI) + return false; bool DefIsLoad = false; if (!ReMatDefMI || !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad)) @@ -1010,7 +1156,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (!vrm.isReMaterialized(Reg)) continue; @@ -1044,7 +1190,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (!mop.isReg()) continue; unsigned Reg = mop.getReg(); - if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (Reg != li.reg) continue; @@ -1140,11 +1286,14 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, rewriteImplicitOps(li, MI, NewVReg, vrm); // Reuse NewVReg for other reads. + bool HasEarlyClobber = false; for (unsigned j = 0, e = Ops.size(); j != e; ++j) { MachineOperand &mopj = MI->getOperand(Ops[j]); mopj.setReg(NewVReg); if (mopj.isImplicit()) rewriteImplicitOps(li, MI, NewVReg, vrm); + if (mopj.isEarlyClobber()) + HasEarlyClobber = true; } if (CreatedNewVReg) { @@ -1190,7 +1339,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (HasUse) { if (CreatedNewVReg) { LiveRange LR(index.getLoadIndex(), index.getDefIndex(), - nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); + nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } else { @@ -1203,8 +1352,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, } } if (HasDef) { - LiveRange LR(index.getDefIndex(), index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator)); + // An early clobber starts at the use slot, except for an early clobber + // tied to a use operand (yes, that is a thing). + LiveRange LR(HasEarlyClobber && !HasUse ? + index.getUseIndex() : index.getDefIndex(), + index.getStoreIndex(), + nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); DEBUG(dbgs() << " +" << LR); nI.addRange(LR); } @@ -1554,15 +1707,15 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { return (isDef + isUse) * lc; } -void -LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { +static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) { for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) - normalizeSpillWeight(*NewLIs[i]); + NewLIs[i]->weight = + normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize()); } std::vector<LiveInterval*> LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, - SmallVectorImpl<LiveInterval*> &SpillIs, + const SmallVectorImpl<LiveInterval*> &SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { assert(li.isSpillable() && "attempt to spill already spilled interval!"); @@ -1653,8 +1806,7 @@ addIntervalsForSpills(const LiveInterval &li, if (VNI->isUnused()) continue; // Dead val#. // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = VNI->isDefAccurate() - ? getInstructionFromIndex(VNI->def) : 0; + MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); bool dummy; if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { // Remember how to remat the def of this val#. @@ -1926,6 +2078,9 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, unsigned PhysReg, VirtRegMap &vrm) { unsigned SpillReg = getRepresentativeReg(PhysReg); + DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg) + << " represented by " << tri_->getName(SpillReg) << '\n'); + for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) // If there are registers which alias PhysReg, but which are not a // sub-register of the chosen representative super register. Assert @@ -1937,15 +2092,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, SmallVector<unsigned, 4> PRegs; if (hasInterval(SpillReg)) PRegs.push_back(SpillReg); - else { - SmallSet<unsigned, 4> Added; - for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) - if (Added.insert(*AS) && hasInterval(*AS)) { - PRegs.push_back(*AS); - for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS) - Added.insert(*ASS); - } - } + for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR) + if (hasInterval(*SR)) + PRegs.push_back(*SR); + + DEBUG({ + dbgs() << "Trying to spill:"; + for (unsigned i = 0, e = PRegs.size(); i != e; ++i) + dbgs() << ' ' << tri_->getName(PRegs[i]); + dbgs() << '\n'; + }); SmallPtrSet<MachineInstr*, 8> SeenMIs; for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), @@ -1956,18 +2112,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, continue; SeenMIs.insert(MI); SlotIndex Index = getInstructionIndex(MI); + bool LiveReg = false; for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { unsigned PReg = PRegs[i]; LiveInterval &pli = getInterval(PReg); if (!pli.liveAt(Index)) continue; - vrm.addEmergencySpill(PReg, MI); + LiveReg = true; SlotIndex StartIdx = Index.getLoadIndex(); SlotIndex EndIdx = Index.getNextIndex().getBaseIndex(); - if (pli.isInOneLiveRange(StartIdx, EndIdx)) { - pli.removeRange(StartIdx, EndIdx); - Cut = true; - } else { + if (!pli.isInOneLiveRange(StartIdx, EndIdx)) { std::string msg; raw_string_ostream Msg(msg); Msg << "Ran out of registers during register allocation!"; @@ -1978,15 +2132,14 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, } report_fatal_error(Msg.str()); } - for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) { - if (!hasInterval(*AS)) - continue; - LiveInterval &spli = getInterval(*AS); - if (spli.liveAt(Index)) - spli.removeRange(Index.getLoadIndex(), - Index.getNextIndex().getBaseIndex()); - } + pli.removeRange(StartIdx, EndIdx); + LiveReg = true; } + if (!LiveReg) + continue; + DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI); + vrm.addEmergencySpill(SpillReg, MI); + Cut = true; } return Cut; } @@ -1996,7 +2149,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getDefIndex()), - startInst, true, getVNInfoAllocator()); + startInst, getVNInfoAllocator()); VN->setHasPHIKill(true); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getDefIndex()), diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp new file mode 100644 index 000000000000..205f28a0d65a --- /dev/null +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -0,0 +1,315 @@ +//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LiveIntervalUnion represents a coalesced set of live intervals. This may be +// used during coalescing to represent a congruence class, or during register +// allocation to model liveness of a physical register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "LiveIntervalUnion.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/MachineLoopRanges.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + + +// Merge a LiveInterval's segments. Guarantee no overlaps. +void LiveIntervalUnion::unify(LiveInterval &VirtReg) { + if (VirtReg.empty()) + return; + ++Tag; + + // Insert each of the virtual register's live segments into the map. + LiveInterval::iterator RegPos = VirtReg.begin(); + LiveInterval::iterator RegEnd = VirtReg.end(); + SegmentIter SegPos = Segments.find(RegPos->start); + + for (;;) { + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); + if (++RegPos == RegEnd) + return; + SegPos.advanceTo(RegPos->start); + } +} + +// Remove a live virtual register's segments from this union. +void LiveIntervalUnion::extract(LiveInterval &VirtReg) { + if (VirtReg.empty()) + return; + ++Tag; + + // Remove each of the virtual register's live segments from the map. + LiveInterval::iterator RegPos = VirtReg.begin(); + LiveInterval::iterator RegEnd = VirtReg.end(); + SegmentIter SegPos = Segments.find(RegPos->start); + + for (;;) { + assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval"); + SegPos.erase(); + if (!SegPos.valid()) + return; + + // Skip all segments that may have been coalesced. + RegPos = VirtReg.advanceTo(RegPos, SegPos.start()); + if (RegPos == RegEnd) + return; + + SegPos.advanceTo(RegPos->start); + } +} + +void +LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + OS << "LIU " << PrintReg(RepReg, TRI); + if (empty()) { + OS << " empty\n"; + return; + } + for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { + OS << " [" << SI.start() << ' ' << SI.stop() << "):" + << PrintReg(SI.value()->reg, TRI); + } + OS << '\n'; +} + +void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) const { + OS << '[' << start() << ';' << stop() << "):" + << PrintReg(interference()->reg, TRI); +} + +void LiveIntervalUnion::Query::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) { + OS << "Interferences with "; + LiveUnion->print(OS, TRI); + InterferenceResult IR = firstInterference(); + while (isInterference(IR)) { + OS << " "; + IR.print(OS, TRI); + OS << '\n'; + nextInterference(IR); + } +} + +#ifndef NDEBUG +// Verify the live intervals in this union and add them to the visited set. +void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { + for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI) + VisitedVRegs.set(SI.value()->reg); +} +#endif //!NDEBUG + +// Private interface accessed by Query. +// +// Find a pair of segments that intersect, one in the live virtual register +// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query) +// is responsible for advancing the LiveIntervalUnion segments to find a +// "notable" intersection, which requires query-specific logic. +// +// This design assumes only a fast mechanism for intersecting a single live +// virtual register segment with a set of LiveIntervalUnion segments. This may +// be ok since most virtual registers have very few segments. If we had a data +// structure that optimizd MxN intersection of segments, then we would bypass +// the loop that advances within the LiveInterval. +// +// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first +// segment whose start point is greater than LiveInterval's end point. +// +// Assumes that segments are sorted by start position in both +// LiveInterval and LiveSegments. +void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const { + // Search until reaching the end of the LiveUnion segments. + LiveInterval::iterator VirtRegEnd = VirtReg->end(); + if (IR.VirtRegI == VirtRegEnd) + return; + while (IR.LiveUnionI.valid()) { + // Slowly advance the live virtual reg iterator until we surpass the next + // segment in LiveUnion. + // + // Note: If this is ever used for coalescing of fixed registers and we have + // a live vreg with thousands of segments, then change this code to use + // upperBound instead. + IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start()); + if (IR.VirtRegI == VirtRegEnd) + break; // Retain current (nonoverlapping) LiveUnionI + + // VirtRegI may have advanced far beyond LiveUnionI, catch up. + IR.LiveUnionI.advanceTo(IR.VirtRegI->start); + + // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end + if (!IR.LiveUnionI.valid()) + break; + if (IR.LiveUnionI.start() < IR.VirtRegI->end) { + assert(overlap(*IR.VirtRegI, IR.LiveUnionI) && + "upperBound postcondition"); + break; + } + } + if (!IR.LiveUnionI.valid()) + IR.VirtRegI = VirtRegEnd; +} + +// Find the first intersection, and cache interference info +// (retain segment iterators into both VirtReg and LiveUnion). +const LiveIntervalUnion::InterferenceResult & +LiveIntervalUnion::Query::firstInterference() { + if (CheckedFirstInterference) + return FirstInterference; + CheckedFirstInterference = true; + InterferenceResult &IR = FirstInterference; + + // Quickly skip interference check for empty sets. + if (VirtReg->empty() || LiveUnion->empty()) { + IR.VirtRegI = VirtReg->end(); + } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) { + // VirtReg starts first, perform double binary search. + IR.VirtRegI = VirtReg->find(LiveUnion->startIndex()); + if (IR.VirtRegI != VirtReg->end()) + IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start); + } else { + // LiveUnion starts first, perform double binary search. + IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex()); + if (IR.LiveUnionI.valid()) + IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start()); + else + IR.VirtRegI = VirtReg->end(); + } + findIntersection(FirstInterference); + assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid()) + && "Uninitialized iterator"); + return FirstInterference; +} + +// Treat the result as an iterator and advance to the next interfering pair +// of segments. This is a plain iterator with no filter. +bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const { + assert(isInterference(IR) && "iteration past end of interferences"); + + // Advance either the VirtReg or LiveUnion segment to ensure that we visit all + // unique overlapping pairs. + if (IR.VirtRegI->end < IR.LiveUnionI.stop()) { + if (++IR.VirtRegI == VirtReg->end()) + return false; + } + else { + if (!(++IR.LiveUnionI).valid()) { + IR.VirtRegI = VirtReg->end(); + return false; + } + } + // Short-circuit findIntersection() if possible. + if (overlap(*IR.VirtRegI, IR.LiveUnionI)) + return true; + + // Find the next intersection. + findIntersection(IR); + return isInterference(IR); +} + +// Scan the vector of interfering virtual registers in this union. Assume it's +// quite small. +bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { + SmallVectorImpl<LiveInterval*>::const_iterator I = + std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg); + return I != InterferingVRegs.end(); +} + +// Count the number of virtual registers in this union that interfere with this +// query's live virtual register. +// +// The number of times that we either advance IR.VirtRegI or call +// LiveUnion.upperBound() will be no more than the number of holes in +// VirtReg. So each invocation of collectInterferingVRegs() takes +// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()). +// +// For comments on how to speed it up, see Query::findIntersection(). +unsigned LiveIntervalUnion::Query:: +collectInterferingVRegs(unsigned MaxInterferingRegs) { + InterferenceResult IR = firstInterference(); + LiveInterval::iterator VirtRegEnd = VirtReg->end(); + LiveInterval *RecentInterferingVReg = NULL; + if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) { + // Advance the union's iterator to reach an unseen interfering vreg. + do { + if (IR.LiveUnionI.value() == RecentInterferingVReg) + continue; + + if (!isSeenInterference(IR.LiveUnionI.value())) + break; + + // Cache the most recent interfering vreg to bypass isSeenInterference. + RecentInterferingVReg = IR.LiveUnionI.value(); + + } while ((++IR.LiveUnionI).valid()); + if (!IR.LiveUnionI.valid()) + break; + + // Advance the VirtReg iterator until surpassing the next segment in + // LiveUnion. + IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start()); + if (IR.VirtRegI == VirtRegEnd) + break; + + // Check for intersection with the union's segment. + if (overlap(*IR.VirtRegI, IR.LiveUnionI)) { + + if (!IR.LiveUnionI.value()->isSpillable()) + SeenUnspillableVReg = true; + + if (InterferingVRegs.size() == MaxInterferingRegs) + // Leave SeenAllInterferences set to false to indicate that at least one + // interference exists beyond those we collected. + return MaxInterferingRegs; + + InterferingVRegs.push_back(IR.LiveUnionI.value()); + + // Cache the most recent interfering vreg to bypass isSeenInterference. + RecentInterferingVReg = IR.LiveUnionI.value(); + ++IR.LiveUnionI; + continue; + } + // VirtRegI may have advanced far beyond LiveUnionI, + // do a fast intersection test to "catch up" + IR.LiveUnionI.advanceTo(IR.VirtRegI->start); + } + SeenAllInterferences = true; + return InterferingVRegs.size(); +} + +bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) { + // VirtReg is likely live throughout the loop, so start by checking LIU-Loop + // overlaps. + IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map> + Overlaps(LiveUnion->getMap(), Loop->getMap()); + if (!Overlaps.valid()) + return false; + + // The loop is overlapping an LIU assignment. Check VirtReg as well. + LiveInterval::iterator VRI = VirtReg->find(Overlaps.start()); + + for (;;) { + if (VRI == VirtReg->end()) + return false; + if (VRI->start < Overlaps.stop()) + return true; + + Overlaps.advanceTo(VRI->start); + if (!Overlaps.valid()) + return false; + if (Overlaps.start() < VRI->end) + return true; + + VRI = VirtReg->advanceTo(VRI, Overlaps.start()); + } +} diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h new file mode 100644 index 000000000000..6f9c5f4455e9 --- /dev/null +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -0,0 +1,258 @@ +//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LiveIntervalUnion is a union of live segments across multiple live virtual +// registers. This may be used during coalescing to represent a congruence +// class, or during register allocation to model liveness of a physical +// register. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVEINTERVALUNION +#define LLVM_CODEGEN_LIVEINTERVALUNION + +#include "llvm/ADT/IntervalMap.h" +#include "llvm/CodeGen/LiveInterval.h" + +#include <algorithm> + +namespace llvm { + +class MachineLoopRange; +class TargetRegisterInfo; + +#ifndef NDEBUG +// forward declaration +template <unsigned Element> class SparseBitVector; +typedef SparseBitVector<128> LiveVirtRegBitSet; +#endif + +/// Compare a live virtual register segment to a LiveIntervalUnion segment. +inline bool +overlap(const LiveRange &VRSeg, + const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) { + return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end; +} + +/// Union of live intervals that are strong candidates for coalescing into a +/// single register (either physical or virtual depending on the context). We +/// expect the constituent live intervals to be disjoint, although we may +/// eventually make exceptions to handle value-based interference. +class LiveIntervalUnion { + // A set of live virtual register segments that supports fast insertion, + // intersection, and removal. + // Mapping SlotIndex intervals to virtual register numbers. + typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments; + +public: + // SegmentIter can advance to the next segment ordered by starting position + // which may belong to a different live virtual register. We also must be able + // to reach the current segment's containing virtual register. + typedef LiveSegments::iterator SegmentIter; + + // LiveIntervalUnions share an external allocator. + typedef LiveSegments::Allocator Allocator; + + class InterferenceResult; + class Query; + +private: + const unsigned RepReg; // representative register number + unsigned Tag; // unique tag for current contents. + LiveSegments Segments; // union of virtual reg segments + +public: + LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a) + {} + + // Iterate over all segments in the union of live virtual registers ordered + // by their starting position. + SegmentIter begin() { return Segments.begin(); } + SegmentIter end() { return Segments.end(); } + SegmentIter find(SlotIndex x) { return Segments.find(x); } + bool empty() const { return Segments.empty(); } + SlotIndex startIndex() const { return Segments.start(); } + + // Provide public access to the underlying map to allow overlap iteration. + typedef LiveSegments Map; + const Map &getMap() { return Segments; } + + /// getTag - Return an opaque tag representing the current state of the union. + unsigned getTag() const { return Tag; } + + /// changedSince - Return true if the union change since getTag returned tag. + bool changedSince(unsigned tag) const { return tag != Tag; } + + // Add a live virtual register to this union and merge its segments. + void unify(LiveInterval &VirtReg); + + // Remove a live virtual register's segments from this union. + void extract(LiveInterval &VirtReg); + + // Print union, using TRI to translate register names + void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; + +#ifndef NDEBUG + // Verify the live intervals in this union and add them to the visited set. + void verify(LiveVirtRegBitSet& VisitedVRegs); +#endif + + /// Cache a single interference test result in the form of two intersecting + /// segments. This allows efficiently iterating over the interferences. The + /// iteration logic is handled by LiveIntervalUnion::Query which may + /// filter interferences depending on the type of query. + class InterferenceResult { + friend class Query; + + LiveInterval::iterator VirtRegI; // current position in VirtReg + SegmentIter LiveUnionI; // current position in LiveUnion + + // Internal ctor. + InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI) + : VirtRegI(VRegI), LiveUnionI(UnionI) {} + + public: + // Public default ctor. + InterferenceResult(): VirtRegI(), LiveUnionI() {} + + /// start - Return the start of the current overlap. + SlotIndex start() const { + return std::max(VirtRegI->start, LiveUnionI.start()); + } + + /// stop - Return the end of the current overlap. + SlotIndex stop() const { + return std::min(VirtRegI->end, LiveUnionI.stop()); + } + + /// interference - Return the register that is interfering here. + LiveInterval *interference() const { return LiveUnionI.value(); } + + // Note: this interface provides raw access to the iterators because the + // result has no way to tell if it's valid to dereference them. + + // Access the VirtReg segment. + LiveInterval::iterator virtRegPos() const { return VirtRegI; } + + // Access the LiveUnion segment. + const SegmentIter &liveUnionPos() const { return LiveUnionI; } + + bool operator==(const InterferenceResult &IR) const { + return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI; + } + bool operator!=(const InterferenceResult &IR) const { + return !operator==(IR); + } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; + }; + + /// Query interferences between a single live virtual register and a live + /// interval union. + class Query { + LiveIntervalUnion *LiveUnion; + LiveInterval *VirtReg; + InterferenceResult FirstInterference; + SmallVector<LiveInterval*,4> InterferingVRegs; + bool CheckedFirstInterference; + bool SeenAllInterferences; + bool SeenUnspillableVReg; + unsigned Tag; + + public: + Query(): LiveUnion(), VirtReg() {} + + Query(LiveInterval *VReg, LiveIntervalUnion *LIU): + LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false), + SeenAllInterferences(false), SeenUnspillableVReg(false) + {} + + void clear() { + LiveUnion = NULL; + VirtReg = NULL; + InterferingVRegs.clear(); + CheckedFirstInterference = false; + SeenAllInterferences = false; + SeenUnspillableVReg = false; + Tag = 0; + } + + void init(LiveInterval *VReg, LiveIntervalUnion *LIU) { + assert(VReg && LIU && "Invalid arguments"); + if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) { + // Retain cached results, e.g. firstInterference. + return; + } + clear(); + LiveUnion = LIU; + VirtReg = VReg; + Tag = LIU->getTag(); + } + + LiveInterval &virtReg() const { + assert(VirtReg && "uninitialized"); + return *VirtReg; + } + + bool isInterference(const InterferenceResult &IR) const { + if (IR.VirtRegI != VirtReg->end()) { + assert(overlap(*IR.VirtRegI, IR.LiveUnionI) && + "invalid segment iterators"); + return true; + } + return false; + } + + // Does this live virtual register interfere with the union? + bool checkInterference() { return isInterference(firstInterference()); } + + // Get the first pair of interfering segments, or a noninterfering result. + // This initializes the firstInterference_ cache. + const InterferenceResult &firstInterference(); + + // Treat the result as an iterator and advance to the next interfering pair + // of segments. Visiting each unique interfering pairs means that the same + // VirtReg or LiveUnion segment may be visited multiple times. + bool nextInterference(InterferenceResult &IR) const; + + // Count the virtual registers in this union that interfere with this + // query's live virtual register, up to maxInterferingRegs. + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); + + // Was this virtual register visited during collectInterferingVRegs? + bool isSeenInterference(LiveInterval *VReg) const; + + // Did collectInterferingVRegs collect all interferences? + bool seenAllInterferences() const { return SeenAllInterferences; } + + // Did collectInterferingVRegs encounter an unspillable vreg? + bool seenUnspillableVReg() const { return SeenUnspillableVReg; } + + // Vector generated by collectInterferingVRegs. + const SmallVectorImpl<LiveInterval*> &interferingVRegs() const { + return InterferingVRegs; + } + + /// checkLoopInterference - Return true if there is interference overlapping + /// Loop. + bool checkLoopInterference(MachineLoopRange*); + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI); + private: + Query(const Query&); // DO NOT IMPLEMENT + void operator=(const Query&); // DO NOT IMPLEMENT + + // Private interface for queries + void findIntersection(InterferenceResult &IR) const; + }; +}; + +} // end namespace llvm + +#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION) diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp new file mode 100644 index 000000000000..3bbda1c2e609 --- /dev/null +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -0,0 +1,129 @@ +//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeEdit class represents changes done to a virtual register when it +// is spilled or split. +//===----------------------------------------------------------------------===// + +#include "LiveRangeEdit.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri, + LiveIntervals &lis, + VirtRegMap &vrm) { + const TargetRegisterClass *RC = mri.getRegClass(getReg()); + unsigned VReg = mri.createVirtualRegister(RC); + vrm.grow(); + vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg())); + LiveInterval &li = lis.getOrCreateInterval(VReg); + newRegs_.push_back(&li); + return li; +} + +void LiveRangeEdit::scanRemattable(LiveIntervals &lis, + const TargetInstrInfo &tii, + AliasAnalysis *aa) { + for (LiveInterval::vni_iterator I = parent_.vni_begin(), + E = parent_.vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); + if (!DefMI) + continue; + if (tii.isTriviallyReMaterializable(DefMI, aa)) + remattable_.insert(VNI); + } + scannedRemattable_ = true; +} + +bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, + const TargetInstrInfo &tii, + AliasAnalysis *aa) { + if (!scannedRemattable_) + scanRemattable(lis, tii, aa); + return !remattable_.empty(); +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx, + LiveIntervals &lis) { + OrigIdx = OrigIdx.getUseIndex(); + UseIdx = UseIdx.getUseIndex(); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg()) + continue; + // Reserved registers are OK. + if (MO.isUndef() || !lis.hasInterval(MO.getReg())) + continue; + // We don't want to move any defs. + if (MO.isDef()) + return false; + // We cannot depend on virtual registers in uselessRegs_. + for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui) + if (uselessRegs_[ui]->reg == MO.getReg()) + return false; + + LiveInterval &li = lis.getInterval(MO.getReg()); + const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + if (OVNI != li.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +bool LiveRangeEdit::canRematerializeAt(Remat &RM, + SlotIndex UseIdx, + bool cheapAsAMove, + LiveIntervals &lis) { + assert(scannedRemattable_ && "Call anyRematerializable first"); + + // Use scanRemattable info. + if (!remattable_.count(RM.ParentVNI)) + return false; + + // No defining instruction. + RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def); + assert(RM.OrigMI && "Defining instruction for remattable value disappeared"); + + // If only cheap remats were requested, bail out early. + if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) + return false; + + // Verify that all used registers are available with the same values. + if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis)) + return false; + + return true; +} + +SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, + const Remat &RM, + LiveIntervals &lis, + const TargetInstrInfo &tii, + const TargetRegisterInfo &tri) { + assert(RM.OrigMI && "Invalid remat"); + tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + rematted_.insert(RM.ParentVNI); + return lis.InsertMachineInstrInMaps(--MI).getDefIndex(); +} + diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h new file mode 100644 index 000000000000..73f69ed63983 --- /dev/null +++ b/lib/CodeGen/LiveRangeEdit.h @@ -0,0 +1,135 @@ +//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeEdit class represents changes done to a virtual register when it +// is spilled or split. +// +// The parent register is never changed. Instead, a number of new virtual +// registers are created and added to the newRegs vector. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H +#define LLVM_CODEGEN_LIVERANGEEDIT_H + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + +class AliasAnalysis; +class LiveIntervals; +class MachineRegisterInfo; +class VirtRegMap; + +class LiveRangeEdit { + LiveInterval &parent_; + SmallVectorImpl<LiveInterval*> &newRegs_; + const SmallVectorImpl<LiveInterval*> &uselessRegs_; + + /// firstNew_ - Index of the first register added to newRegs_. + const unsigned firstNew_; + + /// scannedRemattable_ - true when remattable values have been identified. + bool scannedRemattable_; + + /// remattable_ - Values defined by remattable instructions as identified by + /// tii.isTriviallyReMaterializable(). + SmallPtrSet<VNInfo*,4> remattable_; + + /// rematted_ - Values that were actually rematted, and so need to have their + /// live range trimmed or entirely removed. + SmallPtrSet<VNInfo*,4> rematted_; + + /// scanRemattable - Identify the parent_ values that may rematerialize. + void scanRemattable(LiveIntervals &lis, + const TargetInstrInfo &tii, + AliasAnalysis *aa); + + /// allUsesAvailableAt - Return true if all registers used by OrigMI at + /// OrigIdx are also available with the same value at UseIdx. + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx, LiveIntervals &lis); + +public: + /// Create a LiveRangeEdit for breaking down parent into smaller pieces. + /// @param parent The register being spilled or split. + /// @param newRegs List to receive any new registers created. This needn't be + /// empty initially, any existing registers are ignored. + /// @param uselessRegs List of registers that can't be used when + /// rematerializing values because they are about to be removed. + LiveRangeEdit(LiveInterval &parent, + SmallVectorImpl<LiveInterval*> &newRegs, + const SmallVectorImpl<LiveInterval*> &uselessRegs) + : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs), + firstNew_(newRegs.size()), scannedRemattable_(false) {} + + LiveInterval &getParent() const { return parent_; } + unsigned getReg() const { return parent_.reg; } + + /// Iterator for accessing the new registers added by this edit. + typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator; + iterator begin() const { return newRegs_.begin()+firstNew_; } + iterator end() const { return newRegs_.end(); } + unsigned size() const { return newRegs_.size()-firstNew_; } + bool empty() const { return size() == 0; } + LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } + + /// create - Create a new register with the same class and stack slot as + /// parent. + LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&); + + /// anyRematerializable - Return true if any parent values may be + /// rematerializable. + /// This function must be called before ny rematerialization is attempted. + bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, + AliasAnalysis*); + + /// Remat - Information needed to rematerialize at a specific location. + struct Remat { + VNInfo *ParentVNI; // parent_'s value at the remat location. + MachineInstr *OrigMI; // Instruction defining ParentVNI. + explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {} + }; + + /// canRematerializeAt - Determine if ParentVNI can be rematerialized at + /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. + /// When cheapAsAMove is set, only cheap remats are allowed. + bool canRematerializeAt(Remat &RM, + SlotIndex UseIdx, + bool cheapAsAMove, + LiveIntervals &lis); + + /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an + /// instruction into MBB before MI. The new instruction is mapped, but + /// liveness is not updated. + /// Return the SlotIndex of the new instruction. + SlotIndex rematerializeAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, + const Remat &RM, + LiveIntervals&, + const TargetInstrInfo&, + const TargetRegisterInfo&); + + /// markRematerialized - explicitly mark a value as rematerialized after doing + /// it manually. + void markRematerialized(VNInfo *ParentVNI) { + rematted_.insert(ParentVNI); + } + + /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. + bool didRematerialize(VNInfo *ParentVNI) const { + return rematted_.count(ParentVNI); + } +}; + +} + +#endif diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index b5c385f77239..c75196a47210 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -26,7 +26,9 @@ using namespace llvm; char LiveStacks::ID = 0; INITIALIZE_PASS(LiveStacks, "livestacks", - "Live Stack Slot Analysis", false, false); + "Live Stack Slot Analysis", false, false) + +char &llvm::LiveStacksID = LiveStacks::ID; void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -48,6 +50,22 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) { return false; } +LiveInterval & +LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { + assert(Slot >= 0 && "Spill slot indice must be >= 0"); + SS2IntervalMap::iterator I = S2IMap.find(Slot); + if (I == S2IMap.end()) { + I = S2IMap.insert(I, std::make_pair(Slot, + LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F))); + S2RCMap.insert(std::make_pair(Slot, RC)); + } else { + // Use the largest common subclass register class. + const TargetRegisterClass *OldRC = S2RCMap[Slot]; + S2RCMap[Slot] = getCommonSubClass(OldRC, RC); + } + return I->second; +} + /// print - Implement the dump method. void LiveStacks::print(raw_ostream &OS, const Module*) const { diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 375307b973a9..dd43ef2530c1 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -42,8 +41,11 @@ using namespace llvm; char LiveVariables::ID = 0; -INITIALIZE_PASS(LiveVariables, "livevars", - "Live Variable Analysis", false, false); +INITIALIZE_PASS_BEGIN(LiveVariables, "livevars", + "Live Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) +INITIALIZE_PASS_END(LiveVariables, "livevars", + "Live Variable Analysis", false, false) void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { @@ -79,13 +81,7 @@ void LiveVariables::VarInfo::dump() const { LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && "getVarInfo: not a virtual register!"); - RegIdx -= TargetRegisterInfo::FirstVirtualRegister; - if (RegIdx >= VirtRegInfo.size()) { - if (RegIdx >= 2*VirtRegInfo.size()) - VirtRegInfo.resize(RegIdx*2); - else - VirtRegInfo.resize(2*VirtRegInfo.size()); - } + VirtRegInfo.grow(RegIdx); return VirtRegInfo[RegIdx]; } @@ -498,9 +494,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); PHIJoins.clear(); - /// Get some space for a respectable number of registers. - VirtRegInfo.resize(64); - analyzePHINodes(mf); // Calculate live variable information in depth first order on the CFG of the @@ -628,19 +621,14 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Convert and transfer the dead / killed information we have gathered into // VirtRegInfo onto MI's. - for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) - for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j) - if (VirtRegInfo[i].Kills[j] == - MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister)) - VirtRegInfo[i] - .Kills[j]->addRegisterDead(i + - TargetRegisterInfo::FirstVirtualRegister, - TRI); + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { + const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) + if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) + VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); else - VirtRegInfo[i] - .Kills[j]->addRegisterKilled(i + - TargetRegisterInfo::FirstVirtualRegister, - TRI); + VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI); + } // Check to make sure there are no unreachable blocks in the MC CFG for the // function. If so, it is due to a bug in the instruction selector or some @@ -775,8 +763,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); // Update info for all live variables - for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, - E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); VarInfo &VI = getVarInfo(Reg); if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI)) VI.AliveBlocks.set(NumNew); diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 7e366f0ceec0..1318d6212497 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -9,7 +9,7 @@ // // This pass assigns local frame indices to stack slots relative to one another // and allocates additional base registers to access them when the target -// estimates the are likely to be out of range of stack pointer and frame +// estimates they are likely to be out of range of stack pointer and frame // pointer relative addressing. // //===----------------------------------------------------------------------===// @@ -34,7 +34,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; @@ -152,9 +152,9 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); - const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); bool StackGrowsDown = - TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; unsigned MaxAlign = 0; @@ -227,27 +227,28 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { MachineFrameInfo *MFI = Fn.getFrameInfo(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); bool StackGrowsDown = - TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; - MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin(); + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Collect all of the instructions in the block that reference // a frame index. Also store the frame index referenced to ease later // lookup. (For any insn that has more than one FI reference, we arbitrarily // choose the first one). SmallVector<FrameRef, 64> FrameReferenceInsns; - // A base register definition is a register+offset pair. - SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; + // A base register definition is a register + offset pair. + SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { MachineInstr *MI = I; + // Debug value instructions can't be out of range, so they don't need // any updates. if (MI->isDebugValue()) continue; + // For now, allocate the base register(s) within the basic block // where they're used, and don't try to keep them around outside // of that. It may be beneficial to try sharing them more broadly @@ -268,11 +269,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { } } } + // Sort the frame references by local offset array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + MachineBasicBlock *Entry = Fn.begin(); - // Loop throught the frame references and allocate for them as necessary + // Loop through the frame references and allocate for them as necessary. for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { MachineBasicBlock::iterator I = FrameReferenceInsns[ref].getMachineInstr(); @@ -321,10 +324,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { DEBUG(dbgs() << " Materializing base register " << BaseReg << " at frame local offset " << LocalOffsets[FrameIdx] + InstrOffset << "\n"); + // Tell the target to insert the instruction to initialize // the base register. - TRI->materializeFrameBaseRegister(InsertionPt, BaseReg, - FrameIdx, InstrOffset); + // MachineBasicBlock::iterator InsertionPt = Entry->begin(); + TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, + InstrOffset); // The base register already includes any offset specified // by the instruction, so account for that so it doesn't get diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 50f3f672dced..ccbff0af5b2c 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -146,27 +147,46 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { return I; } +MachineBasicBlock::iterator +MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { + while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue())) + ++I; + return I; +} + MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator I = end(); - while (I != begin() && (--I)->getDesc().isTerminator()) + while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue())) ; /*noop */ - if (I != end() && !I->getDesc().isTerminator()) ++I; + while (I != end() && !I->getDesc().isTerminator()) + ++I; return I; } -void MachineBasicBlock::dump() const { - print(dbgs()); +MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { + iterator B = begin(), I = end(); + while (I != B) { + --I; + if (I->isDebugValue()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { + // A block with a landing pad successor only has one other successor. + if (succ_size() > 2) + return 0; + for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) + if ((*I)->isLandingPad()) + return *I; + return 0; } -static inline void OutputReg(raw_ostream &os, unsigned RegNo, - const TargetRegisterInfo *TRI = 0) { - if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) { - if (TRI) - os << " %" << TRI->get(RegNo).Name; - else - os << " %physreg" << RegNo; - } else - os << " %reg" << RegNo; +void MachineBasicBlock::dump() const { + print(dbgs()); } StringRef MachineBasicBlock::getName() const { @@ -176,7 +196,7 @@ StringRef MachineBasicBlock::getName() const { return "(null)"; } -void MachineBasicBlock::print(raw_ostream &OS) const { +void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -186,6 +206,9 @@ void MachineBasicBlock::print(raw_ostream &OS) const { if (Alignment) { OS << "Alignment " << Alignment << "\n"; } + if (Indexes) + OS << Indexes->getMBBStartIdx(this) << '\t'; + OS << "BB#" << getNumber() << ": "; const char *Comma = ""; @@ -198,28 +221,36 @@ void MachineBasicBlock::print(raw_ostream &OS) const { if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } OS << '\n'; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); if (!livein_empty()) { + if (Indexes) OS << '\t'; OS << " Live Ins:"; for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) - OutputReg(OS, *I, TRI); + OS << ' ' << PrintReg(*I, TRI); OS << '\n'; } // Print the preds of this block according to the CFG. if (!pred_empty()) { + if (Indexes) OS << '\t'; OS << " Predecessors according to CFG:"; for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) OS << " BB#" << (*PI)->getNumber(); OS << '\n'; } - + for (const_iterator I = begin(); I != end(); ++I) { + if (Indexes) { + if (Indexes->hasIndex(I)) + OS << Indexes->getInstructionIndex(I); + OS << '\t'; + } OS << '\t'; I->print(OS, &getParent()->getTarget()); } // Print the successors of this block according to the CFG. if (!succ_empty()) { + if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) OS << " BB#" << (*SI)->getNumber(); @@ -431,14 +462,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere - // We may need to update this's terminator, but we can't do that if AnalyzeBranch - // fails. If this uses a jump table, we won't touch it. + // We may need to update this's terminator, but we can't do that if + // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) return NULL; + // Avoid bugpoint weirdness: A block may end with a conditional branch but + // jumps to the same MBB is either case. We have duplicate CFG edges in that + // case that we can't handle. Since this never happens in properly optimized + // code, just skip those edges. + if (TBB && TBB == FBB) { + DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" + << getNumber() << '\n'); + return NULL; + } + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 272b54dea1fa..07a7d27b019f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -22,15 +22,18 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/RecyclingAllocator.h" using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); -STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); +STATISTIC(NumPhysCSEs, + "Number of physreg referencing common subexpr eliminated"); +STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { class MachineCSE : public MachineFunctionPass { @@ -41,7 +44,9 @@ namespace { MachineRegisterInfo *MRI; public: static char ID; // Pass identification - MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {} + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) { + initializeMachineCSEPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -61,10 +66,13 @@ namespace { private: const unsigned LookAheadLimit; - typedef ScopedHashTableScope<MachineInstr*, unsigned, - MachineInstrExpressionTrait> ScopeType; + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy; + typedef ScopedHashTable<MachineInstr*, unsigned, + MachineInstrExpressionTrait, AllocatorTy> ScopedHTType; + typedef ScopedHTType::ScopeTy ScopeType; DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; - ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; + ScopedHTType VNT; SmallVector<MachineInstr*, 64> Exps; unsigned CurrVN; @@ -72,11 +80,11 @@ namespace { bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) const ; - bool hasLivePhysRegDefUse(const MachineInstr *MI, - const MachineBasicBlock *MBB, - unsigned &PhysDef) const; - bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, - unsigned PhysDef) const; + bool hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<unsigned,8> &PhysRefs) const; + bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<unsigned,8> &PhysRefs) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI); @@ -91,8 +99,12 @@ namespace { } // end anonymous namespace char MachineCSE::ID = 0; -INITIALIZE_PASS(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false); +INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false) FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } @@ -104,7 +116,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (!MRI->hasOneNonDBGUse(Reg)) // Only coalesce single use copies. This ensure the copy will be @@ -120,17 +132,12 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, continue; if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) continue; - const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); - if (!NewRC) + if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << *MI); + DEBUG(dbgs() << "*** to: " << *MI); MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); - if (NewRC != SRC) - MRI->setRegClass(SrcReg, NewRC); DefMI->eraseFromParent(); ++NumCoalesces; Changed = true; @@ -176,14 +183,14 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, return false; } -/// hasLivePhysRegDefUse - Return true if the specified instruction read / write +/// hasLivePhysRegDefUses - Return true if the specified instruction read/write /// physical registers (except for dead defs of physical registers). It also /// returns the physical register def by reference if it's the only one and the /// instruction does not uses a physical register. -bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, - const MachineBasicBlock *MBB, - unsigned &PhysDef) const { - PhysDef = 0; +bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<unsigned,8> &PhysRefs) const { + MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) @@ -193,35 +200,22 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) { - // Can't touch anything to read a physical register. - PhysDef = 0; - return true; - } - if (MO.isDead()) - // If the def is dead, it's ok. - continue; - // Ok, this is a physical register def that's not marked "dead". That's + // If the def is dead, it's ok. But the def may not marked "dead". That's // common since this pass is run before livevariables. We can scan // forward a few instructions and check if it is obviously dead. - if (PhysDef) { - // Multiple physical register defs. These are rare, forget about it. - PhysDef = 0; - return true; - } - PhysDef = Reg; + if (MO.isDef() && + (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) + continue; + PhysRefs.insert(Reg); + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) + PhysRefs.insert(*Alias); } - if (PhysDef) { - MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); - if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) - return true; - } - return false; + return !PhysRefs.empty(); } -bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, - unsigned PhysDef) const { +bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<unsigned,8> &PhysRefs) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. MachineBasicBlock *MBB = MI->getParent(); @@ -237,8 +231,17 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, if (I == E) return true; - if (I->modifiesRegister(PhysDef, TRI)) - return false; + + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + continue; + if (PhysRefs.count(MOReg)) + return false; + } --LookAheadLeft; ++I; @@ -259,7 +262,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { // Ignore stuff that we obviously can't move. const TargetInstrDesc &TID = MI->getDesc(); if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) + MI->hasUnmodeledSideEffects()) return false; if (TID.mayLoad()) { @@ -281,14 +284,13 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. - // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an - // immediate predecessor. We don't want to increase register pressure and end up - // causing other computation to be spilled. + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in + // an immediate predecessor. We don't want to increase register pressure and + // end up causing other computation to be spilled. if (MI->getDesc().isAsCheapAsAMove()) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); - if (CSBB != BB && - find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) + if (CSBB != BB && !CSBB->isSuccessor(BB)) return false; } @@ -297,7 +299,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, bool HasVRegUse = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() && + if (MO.isReg() && MO.isUse() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { HasVRegUse = true; break; @@ -359,7 +361,6 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!isCSECandidate(MI)) continue; - bool DefPhys = false; bool FoundCSE = VNT.count(MI); if (!FoundCSE) { // Look for trivial copy coalescing opportunities. @@ -370,24 +371,37 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { FoundCSE = VNT.count(MI); } } - // FIXME: commute commutable instructions? - // If the instruction defines a physical register and the value *may* be + // Commute commutable instructions. + bool Commuted = false; + if (!FoundCSE && MI->getDesc().isCommutable()) { + MachineInstr *NewMI = TII->commuteInstruction(MI); + if (NewMI) { + Commuted = true; + FoundCSE = VNT.count(NewMI); + if (NewMI != MI) + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + else if (!FoundCSE) + // MI was changed but it didn't help, commute it back! + (void)TII->commuteInstruction(MI); + } + } + + // If the instruction defines physical registers and the values *may* be // used, then it's not safe to replace it with a common subexpression. - unsigned PhysDef = 0; - if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { + // It's also not safe if the instruction uses physical registers. + SmallSet<unsigned,8> PhysRefs; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) { FoundCSE = false; // ... Unless the CS is local and it also defines the physical register - // which is not clobbered in between. - if (PhysDef) { - unsigned CSVN = VNT.lookup(MI); - MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefReaches(CSMI, MI, PhysDef)) { - FoundCSE = true; - DefPhys = true; - } - } + // which is not clobbered in between and the physical register uses + // were not clobbered. + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs)) + FoundCSE = true; } if (!FoundCSE) { @@ -432,8 +446,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { } MI->eraseFromParent(); ++NumCSEs; - if (DefPhys) + if (!PhysRefs.empty()) ++NumPhysCSEs; + if (Commuted) + ++NumCommutes; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 3c674789244a..04c8ecbf9bdc 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -25,7 +25,7 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>); char MachineDominatorTree::ID = 0; INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", - "MachineDominator Tree Construction", true, true); + "MachineDominator Tree Construction", true, true) char &llvm::MachineDominatorsID = MachineDominatorTree::ID; @@ -42,6 +42,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { MachineDominatorTree::MachineDominatorTree() : MachineFunctionPass(ID) { + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); DT = new DominatorTreeBase<MachineBasicBlock>(false); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 017170076ceb..85532407ca43 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -33,7 +33,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/GraphWriter.h" @@ -52,14 +52,15 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { } MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, - unsigned FunctionNum, MachineModuleInfo &mmi) - : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) { + unsigned FunctionNum, MachineModuleInfo &mmi, + GCModuleInfo* gmi) + : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { if (TM.getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); else RegInfo = 0; MFInfo = 0; - FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo()); + FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); if (Fn->hasFnAttr(Attribute::StackAlignment)) FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( Fn->getAttributes().getFnAttributes())); @@ -190,20 +191,21 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { } MachineMemOperand * -MachineFunction::getMachineMemOperand(const Value *v, unsigned f, - int64_t o, uint64_t s, - unsigned base_alignment) { - return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment); +MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, + uint64_t s, unsigned base_alignment, + const MDNode *TBAAInfo) { + return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, + TBAAInfo); } MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { return new (Allocator) - MachineMemOperand(MMO->getValue(), MMO->getFlags(), - int64_t(uint64_t(MMO->getOffset()) + - uint64_t(Offset)), - Size, MMO->getBaseAlignment()); + MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MMO->getOffset()+Offset), + MMO->getFlags(), Size, + MMO->getBaseAlignment(), 0); } MachineInstr::mmo_iterator @@ -231,10 +233,10 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, else { // Clone the MMO and unset the store flag. MachineMemOperand *JustLoad = - getMachineMemOperand((*I)->getValue(), + getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOStore, - (*I)->getOffset(), (*I)->getSize(), - (*I)->getBaseAlignment()); + (*I)->getSize(), (*I)->getBaseAlignment(), + (*I)->getTBAAInfo()); Result[Index] = JustLoad; } ++Index; @@ -263,10 +265,10 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, else { // Clone the MMO and unset the load flag. MachineMemOperand *JustStore = - getMachineMemOperand((*I)->getValue(), + getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOLoad, - (*I)->getOffset(), (*I)->getSize(), - (*I)->getBaseAlignment()); + (*I)->getSize(), (*I)->getBaseAlignment(), + (*I)->getTBAAInfo()); Result[Index] = JustStore; } ++Index; @@ -279,7 +281,7 @@ void MachineFunction::dump() const { print(dbgs()); } -void MachineFunction::print(raw_ostream &OS) const { +void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << "# Machine code for function " << Fn->getName() << ":\n"; // Print Frame Information @@ -328,7 +330,7 @@ void MachineFunction::print(raw_ostream &OS) const { for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; - BB->print(OS); + BB->print(OS, Indexes); } OS << "\n# End machine code for function " << Fn->getName() << ".\n\n"; @@ -346,17 +348,15 @@ namespace llvm { std::string getNodeLabel(const MachineBasicBlock *Node, const MachineFunction *Graph) { - if (isSimple () && Node->getBasicBlock() && - !Node->getBasicBlock()->getName().empty()) - return Node->getBasicBlock()->getNameStr() + ":"; - std::string OutStr; { raw_string_ostream OSS(OutStr); - - if (isSimple()) - OSS << Node->getNumber() << ':'; - else + + if (isSimple()) { + OSS << "BB#" << Node->getNumber(); + if (const BasicBlock *BB = Node->getBasicBlock()) + OSS << ": " << BB->getName(); + } else Node->print(OSS); } @@ -396,7 +396,8 @@ void MachineFunction::viewCFGOnly() const /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, - const TargetRegisterClass *RC) { + const TargetRegisterClass *RC, + DebugLoc DL) { MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { @@ -405,6 +406,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, } VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(PReg, VReg); + MRI.addLiveInLoc(VReg, DL); return VReg; } @@ -426,6 +428,13 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, return Ctx.GetOrCreateSymbol(Name.str()); } +/// getPICBaseSymbol - Return a function-local symbol to represent the PIC +/// base. +MCSymbol *MachineFunction::getPICBaseSymbol() const { + const MCAsmInfo &MAI = *Target.getMCAsmInfo(); + return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + Twine(getFunctionNumber())+"$pb"); +} //===----------------------------------------------------------------------===// // MachineFrameInfo implementation @@ -485,7 +494,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; - const TargetFrameInfo *FI = MF.getTarget().getFrameInfo(); + const TargetFrameLowering *FI = MF.getTarget().getFrameLowering(); int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); OS << "Frame Objects:\n"; diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 4f84b952e061..054c750c9f2b 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -12,22 +12,17 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" using namespace llvm; -// Register this pass with PassInfo directly to avoid having to define -// a default constructor. -static PassInfo -X("Machine Function Analysis", "machine-function-analysis", - &MachineFunctionAnalysis::ID, 0, - /*CFGOnly=*/false, /*is_analysis=*/true); - char MachineFunctionAnalysis::ID = 0; MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, CodeGenOpt::Level OL) : FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { + initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineFunctionAnalysis::~MachineFunctionAnalysis() { @@ -52,7 +47,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) { bool MachineFunctionAnalysis::runOnFunction(Function &F) { assert(!MF && "MachineFunctionAnalysis already initialized!"); MF = new MachineFunction(&F, TM, NextFnNum++, - getAnalysis<MachineModuleInfo>()); + getAnalysis<MachineModuleInfo>(), + getAnalysisIfAvailable<GCModuleInfo>()); return false; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 446e461d5460..aa9ea61acec7 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -102,13 +102,13 @@ void MachineOperand::setReg(unsigned Reg) { if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) { RemoveRegOperandFromRegInfo(); - Contents.Reg.RegNo = Reg; + SmallContents.RegNo = Reg; AddRegOperandToRegInfo(&MF->getRegInfo()); return; } // Otherwise, just change the register, no problem. :) - Contents.Reg.RegNo = Reg; + SmallContents.RegNo = Reg; } void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, @@ -159,7 +159,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, } else { // Otherwise, change this to a register and set the reg#. OpKind = MO_Register; - Contents.Reg.RegNo = Reg; + SmallContents.RegNo = Reg; // If this operand is embedded in a function, add the operand to the // register's use/def list. @@ -227,24 +227,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (const MachineBasicBlock *MBB = MI->getParent()) if (const MachineFunction *MF = MBB->getParent()) TM = &MF->getTarget(); + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; switch (getType()) { case MachineOperand::MO_Register: - if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) { - OS << "%reg" << getReg(); - } else { - if (TM) - OS << "%" << TM->getRegisterInfo()->get(getReg()).Name; - else - OS << "%physreg" << getReg(); - } - - if (getSubReg() != 0) { - if (TM) - OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg()); - else - OS << ':' << getSubReg(); - } + OS << PrintReg(getReg(), TRI, getSubReg()); if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || isEarlyClobber()) { @@ -335,10 +322,45 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { // MachineMemOperand Implementation //===----------------------------------------------------------------------===// -MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, - int64_t o, uint64_t s, unsigned int a) - : Offset(o), Size(s), V(v), - Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) { +/// getAddrSpace - Return the LLVM IR address space number that this pointer +/// points into. +unsigned MachinePointerInfo::getAddrSpace() const { + if (V == 0) return 0; + return cast<PointerType>(V->getType())->getAddressSpace(); +} + +/// getConstantPool - Return a MachinePointerInfo record that refers to the +/// constant pool. +MachinePointerInfo MachinePointerInfo::getConstantPool() { + return MachinePointerInfo(PseudoSourceValue::getConstantPool()); +} + +/// getFixedStack - Return a MachinePointerInfo record that refers to the +/// the specified FrameIndex. +MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) { + return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset); +} + +MachinePointerInfo MachinePointerInfo::getJumpTable() { + return MachinePointerInfo(PseudoSourceValue::getJumpTable()); +} + +MachinePointerInfo MachinePointerInfo::getGOT() { + return MachinePointerInfo(PseudoSourceValue::getGOT()); +} + +MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { + return MachinePointerInfo(PseudoSourceValue::getStack(), Offset); +} + +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, + uint64_t s, unsigned int a, + const MDNode *TBAAInfo) + : PtrInfo(ptrinfo), Size(s), + Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), + TBAAInfo(TBAAInfo) { + assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) && + "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); } @@ -346,9 +368,9 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, /// Profile - Gather unique data for the object. /// void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Offset); + ID.AddInteger(getOffset()); ID.AddInteger(Size); - ID.AddPointer(V); + ID.AddPointer(getValue()); ID.AddInteger(Flags); } @@ -364,8 +386,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); // Also update the base and offset, because the new alignment may // not be applicable with the old ones. - V = MMO->getValue(); - Offset = MMO->getOffset(); + PtrInfo = MMO->PtrInfo; } } @@ -410,6 +431,16 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { MMO.getBaseAlignment() != MMO.getSize()) OS << "(align=" << MMO.getAlignment() << ")"; + // Print TBAA info. + if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) { + OS << "(tbaa="; + if (TBAAInfo->getNumOperands() > 0) + WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false); + else + OS << "<unknown>"; + OS << ")"; + } + return OS; } @@ -782,6 +813,14 @@ unsigned MachineInstr::getNumExplicitOperands() const { return NumOperands; } +bool MachineInstr::isStackAligningInlineAsm() const { + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + return true; + } + return false; +} /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens @@ -881,14 +920,15 @@ int MachineInstr::findFirstPredOperandIdx() const { bool MachineInstr:: isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { if (isInlineAsm()) { - assert(DefOpIdx >= 3); + assert(DefOpIdx > InlineAsm::MIOp_FirstOperand); const MachineOperand &MO = getOperand(DefOpIdx); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) return false; // Determine the actual operand index that corresponds to this index. unsigned DefNo = 0; unsigned DefPart = 0; - for (unsigned i = 2, e = getNumOperands(); i < e; ) { + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); + i < e; ) { const MachineOperand &FMO = getOperand(i); // After the normal asm operands there may be additional imp-def regs. if (!FMO.isImm()) @@ -903,7 +943,8 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } ++DefNo; } - for (unsigned i = 2, e = getNumOperands(); i != e; ++i) { + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); + i != e; ++i) { const MachineOperand &FMO = getOperand(i); if (!FMO.isImm()) continue; @@ -946,7 +987,8 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { // Find the flag operand corresponding to UseOpIdx unsigned FlagIdx, NumOps=0; - for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + for (FlagIdx = InlineAsm::MIOp_FirstOperand; + FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { const MachineOperand &UFMO = getOperand(FlagIdx); // After the normal asm operands there may be additional imp-def regs. if (!UFMO.isImm()) @@ -964,9 +1006,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { if (!DefOpIdx) return true; - unsigned DefIdx = 2; + unsigned DefIdx = InlineAsm::MIOp_FirstOperand; // Remember to adjust the index. First operand is asm string, second is - // the AlignStack bit, then there is a flag for each. + // the HasSideEffects and AlignStack bits, then there is a flag for each. while (DefNo) { const MachineOperand &FMO = getOperand(DefIdx); assert(FMO.isImm()); @@ -1071,7 +1113,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, SawStore = true; return false; } - if (TID->isTerminator() || TID->hasUnmodeledSideEffects()) + + if (isLabel() || isDebugValue() || + TID->isTerminator() || hasUnmodeledSideEffects()) return false; // See if this instruction does a load. If so, we have to guarantee that the @@ -1122,7 +1166,7 @@ bool MachineInstr::hasVolatileMemoryRef() const { if (!TID->mayStore() && !TID->mayLoad() && !TID->isCall() && - !TID->hasUnmodeledSideEffects()) + !hasUnmodeledSideEffects()) return false; // Otherwise, if the instruction has no memory reference information, @@ -1166,7 +1210,9 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { if (PSV->isConstant(MFI)) continue; // If we have an AliasAnalysis, ask it whether the memory is constant. - if (AA && AA->pointsToConstantMemory(V)) + if (AA && AA->pointsToConstantMemory( + AliasAnalysis::Location(V, (*I)->getSize(), + (*I)->getTBAAInfo()))) continue; } @@ -1194,6 +1240,18 @@ unsigned MachineInstr::isConstantValuePHI() const { return Reg; } +bool MachineInstr::hasUnmodeledSideEffects() const { + if (getDesc().hasUnmodeledSideEffects()) + return true; + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_HasSideEffects) + return true; + } + + return false; +} + /// allDefsAreDead - Return true if all the defs of this instruction are dead. /// bool MachineInstr::allDefsAreDead() const { @@ -1207,6 +1265,17 @@ bool MachineInstr::allDefsAreDead() const { return true; } +/// copyImplicitOps - Copy implicit register operands from specified +/// instruction to this instruction. +void MachineInstr::copyImplicitOps(const MachineInstr *MI) { + for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + addOperand(MO); + } +} + void MachineInstr::dump() const { dbgs() << " " << *this; } @@ -1257,7 +1326,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (StartOp != 0) OS << ", "; getOperand(StartOp).print(OS, TM); unsigned Reg = getOperand(StartOp).getReg(); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) VirtRegs.push_back(Reg); } @@ -1270,11 +1339,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; + + if (isInlineAsm()) { + // Print asm string. + OS << " "; + getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); + + // Print HasSideEffects, IsAlignStack + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_HasSideEffects) + OS << " [sideeffect]"; + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + OS << " [alignstack]"; + + StartOp = InlineAsm::MIOp_FirstOperand; + FirstOp = false; + } + + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.getReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegs.push_back(MO.getReg()); // Omit call-clobbered registers which aren't used anywhere. This makes @@ -1284,7 +1370,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (MF && getDesc().isCall() && MO.isReg() && MO.isImplicit() && MO.isDef()) { unsigned Reg = MO.getReg(); - if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { bool HasAliasLive = false; @@ -1348,14 +1434,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (!HaveSemi) OS << ";"; HaveSemi = true; for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << RC->getName() << ":%reg" << VirtRegs[i]; + OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClass(VirtRegs[j]) != RC) { ++j; continue; } if (VirtRegs[i] != VirtRegs[j]) - OS << "," << VirtRegs[j]; + OS << "," << PrintReg(VirtRegs[j]); VirtRegs.erase(VirtRegs.begin()+j); } } @@ -1533,8 +1619,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { switch (MO.getType()) { default: break; case MachineOperand::MO_Register: - if (MO.isDef() && MO.getReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) + if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. Key |= MO.getReg(); break; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 1a74b747e9f2..443fc2d97bdf 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -28,8 +28,10 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" @@ -40,8 +42,14 @@ using namespace llvm; -STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); -STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed"); +STATISTIC(NumHoisted, + "Number of machine instructions hoisted out of loops"); +STATISTIC(NumLowRP, + "Number of instructions hoisted in low reg pressure situation"); +STATISTIC(NumHighLatency, + "Number of high latency instructions hoisted"); +STATISTIC(NumCSEed, + "Number of hoisted machine instructions CSEed"); STATISTIC(NumPostRAHoisted, "Number of machine instructions hoisted out of loops post regalloc"); @@ -51,9 +59,11 @@ namespace { const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetLowering *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; - MachineRegisterInfo *RegInfo; + MachineRegisterInfo *MRI; + const InstrItineraryData *InstrItins; // Various analyses that we use... AliasAnalysis *AA; // Alias analysis info. @@ -68,23 +78,37 @@ namespace { BitVector AllocatableSet; + // Track 'estimated' register pressure. + SmallSet<unsigned, 32> RegSeen; + SmallVector<unsigned, 8> RegPressure; + + // Register pressure "limit" per register class. If the pressure + // is higher than the limit, then it's considered high. + SmallVector<unsigned, 8> RegLimit; + + // Register pressure on path leading from loop preheader to current BB. + SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; + // For each opcode, keep a list of potential CSE instructions. DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; public: static char ID; // Pass identification, replacement for typeid MachineLICM() : - MachineFunctionPass(ID), PreRegAlloc(true) {} + MachineFunctionPass(ID), PreRegAlloc(true) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } explicit MachineLICM(bool PreRA) : - MachineFunctionPass(ID), PreRegAlloc(PreRA) {} + MachineFunctionPass(ID), PreRegAlloc(PreRA) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); const char *getPassName() const { return "Machine Instruction LICM"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<AliasAnalysis>(); @@ -94,6 +118,13 @@ namespace { } virtual void releaseMemory() { + RegSeen.clear(); + RegPressure.clear(); + RegLimit.clear(); + BackTrace.clear(); + for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator + CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) + CI->second.clear(); CSEMap.clear(); } @@ -138,6 +169,24 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' + /// and an use in the current loop, return true if the target considered + /// it 'high'. + bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, + unsigned Reg) const; + + bool IsCheapInstruction(MachineInstr &MI) const; + + /// CanCauseHighRegPressure - Visit BBs from header to current BB, + /// check if hoisting an instruction of the given cost matrix can cause high + /// register pressure. + bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost); + + /// UpdateBackTraceRegPressure - Traverse the back trace from header to + /// the current block and update their register pressures to reflect the + /// effect of hoisting MI from the current block to the preheader. + void UpdateBackTraceRegPressure(const MachineInstr *MI); + /// IsProfitableToHoist - Return true if it is potentially profitable to /// hoist the given loop invariant. bool IsProfitableToHoist(MachineInstr &MI); @@ -148,11 +197,16 @@ namespace { /// visit definitions before uses, allowing us to hoist a loop body in one /// pass without iteration. /// - void HoistRegion(MachineDomTreeNode *N); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false); + + /// InitRegPressure - Find all virtual register references that are liveout + /// of the preheader to initialize the starting "register pressure". Note + /// this does not count live through (livein but not used) registers. + void InitRegPressure(MachineBasicBlock *BB); - /// isLoadFromConstantMemory - Return true if the given instruction is a - /// load from constant memory. - bool isLoadFromConstantMemory(MachineInstr *MI); + /// UpdateRegPressure - Update estimate of register pressure after the + /// specified instruction. + void UpdateRegPressure(const MachineInstr *MI); /// ExtractHoistableLoad - Unfold a load from the given machineinstr if /// the load itself could be hoisted. Return the unfolded and hoistable @@ -174,8 +228,8 @@ namespace { /// Hoist - When an instruction is found to only use loop invariant operands /// that is safe to hoist, this instruction is called to do the dirty work. - /// - void Hoist(MachineInstr *MI); + /// It returns true if the instruction is hoisted. + bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); /// InitCSEMap - Initialize the CSE map with instructions that are in the /// current loop preheader that may become duplicates of instructions that @@ -189,8 +243,13 @@ namespace { } // end anonymous namespace char MachineLICM::ID = 0; -INITIALIZE_PASS(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false); +INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false) FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { return new MachineLICM(PreRegAlloc); @@ -212,18 +271,32 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (PreRegAlloc) - DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n"); + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); else - DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); + DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); + DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n"); Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); + TLI = TM->getTargetLowering(); TRI = TM->getRegisterInfo(); MFI = MF.getFrameInfo(); - RegInfo = &MF.getRegInfo(); + MRI = &MF.getRegInfo(); + InstrItins = TM->getInstrItineraryData(); AllocatableSet = TRI->getAllocatableSet(MF); + if (PreRegAlloc) { + // Estimate register pressure during pre-regalloc pass. + unsigned NumRC = TRI->getNumRegClasses(); + RegPressure.resize(NumRC); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + RegLimit.resize(NumRC); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF); + } + // Get our Loop information... MLI = &getAnalysis<MachineLoopInfo>(); DT = &getAnalysis<MachineDominatorTree>(); @@ -248,7 +321,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); FirstInLoop = true; - HoistRegion(N); + HoistRegion(N, true); CSEMap.clear(); } } @@ -474,17 +547,33 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. /// -void MachineLICM::HoistRegion(MachineDomTreeNode *N) { +void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) { assert(N != 0 && "Null dominator tree node?"); MachineBasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. if (!CurLoop->contains(BB)) return; + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + if (IsHeader) { + // Compute registers which are livein into the loop headers. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); + } + + // Remember livein register pressure. + BackTrace.push_back(RegPressure); + for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; - Hoist(&*MII); + MachineInstr *MI = &*MII; + if (!Hoist(MI, Preheader)) + UpdateRegPressure(MI); MII = NextMII; } @@ -496,6 +585,99 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { for (unsigned I = 0, E = Children.size(); I != E; ++I) HoistRegion(Children[I]); } + + BackTrace.pop_back(); +} + +static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { + return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); +} + +/// InitRegPressure - Find all virtual register references that are liveout of +/// the preheader to initialize the starting "register pressure". Note this +/// does not count live through (livein but not used) registers. +void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { + std::fill(RegPressure.begin(), RegPressure.end(), 0); + + // If the preheader has only a single predecessor and it ends with a + // fallthrough or an unconditional branch, then scan its predecessor for live + // defs as well. This happens whenever the preheader is created by splitting + // the critical edge from the loop predecessor to the loop header. + if (BB->pred_size() == 1) { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) + InitRegPressure(*BB->pred_begin()); + } + + for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); + MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + bool isNew = RegSeen.insert(Reg); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (MO.isDef()) + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + else { + bool isKill = isOperandKill(MO, MRI); + if (isNew && !isKill) + // Haven't seen this, it must be a livein. + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + else if (!isNew && isKill) + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + } +} + +/// UpdateRegPressure - Update estimate of register pressure after the +/// specified instruction. +void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { + if (MI->isImplicitDef()) + return; + + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + bool isNew = RegSeen.insert(Reg); + if (MO.isDef()) + Defs.push_back(Reg); + else if (!isNew && isOperandKill(MO, MRI)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned RCCost = TLI->getRepRegClassCostFor(VT); + + if (RCCost > RegPressure[RCId]) + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= RCCost; + } + } + + while (!Defs.empty()) { + unsigned Reg = Defs.pop_back_val(); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned RCCost = TLI->getRepRegClassCostFor(VT); + RegPressure[RCId] += RCCost; + } } /// IsLICMCandidate - Returns true if the instruction may be a suitable @@ -535,14 +717,14 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!RegInfo->def_empty(Reg)) + if (!MRI->def_empty(Reg)) return false; if (AllocatableSet.test(Reg)) return false; // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; - if (!RegInfo->def_empty(AliasReg)) + if (!MRI->def_empty(AliasReg)) return false; if (AllocatableSet.test(AliasReg)) return false; @@ -562,12 +744,12 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { if (!MO.isUse()) continue; - assert(RegInfo->getVRegDef(Reg) && + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); // If the loop contains the definition of an operand, then the instruction // isn't loop invariant. - if (CurLoop->contains(RegInfo->getVRegDef(Reg))) + if (CurLoop->contains(MRI->getVRegDef(Reg))) return false; } @@ -577,9 +759,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { /// HasPHIUses - Return true if the specified register has any PHI use. -static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { - for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg), - UE = RegInfo->use_end(); UI != UE; ++UI) { +static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; if (UseMI->isPHI()) return true; @@ -587,37 +769,210 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { return false; } -/// isLoadFromConstantMemory - Return true if the given instruction is a -/// load from constant memory. Machine LICM will hoist these even if they are -/// not re-materializable. -bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { - if (!MI->getDesc().mayLoad()) return false; - if (!MI->hasOneMemOperand()) return false; - MachineMemOperand *MMO = *MI->memoperands_begin(); - if (MMO->isVolatile()) return false; - if (!MMO->getValue()) return false; - const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue()); - if (PSV) { - MachineFunction &MF = *MI->getParent()->getParent(); - return PSV->isConstant(MF.getFrameInfo()); - } else { - return AA->pointsToConstantMemory(MMO->getValue()); + +/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' +/// and an use in the current loop, return true if the target considered +/// it 'high'. +bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, + unsigned DefIdx, unsigned Reg) const { + if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg)) + return false; + + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (UseMI->isCopyLike()) + continue; + if (!CurLoop->contains(UseMI->getParent())) + continue; + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i)) + return true; + } + + // Only look at the first in loop use. + break; + } + + return false; +} + +/// IsCheapInstruction - Return true if the instruction is marked "cheap" or +/// the operand latency between its def and a use is one or less. +bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { + if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike()) + return true; + if (!InstrItins || InstrItins->isEmpty()) + return false; + + bool isCheap = false; + unsigned NumDefs = MI.getDesc().getNumDefs(); + for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &DefMO = MI.getOperand(i); + if (!DefMO.isReg() || !DefMO.isDef()) + continue; + --NumDefs; + unsigned Reg = DefMO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + if (!TII->hasLowDefLatency(InstrItins, &MI, i)) + return false; + isCheap = true; + } + + return isCheap; +} + +/// CanCauseHighRegPressure - Visit BBs from header to current BB, check +/// if hoisting an instruction of the given cost matrix can cause high +/// register pressure. +bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { + for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); + CI != CE; ++CI) { + if (CI->second <= 0) + continue; + + unsigned RCId = CI->first; + for (unsigned i = BackTrace.size(); i != 0; --i) { + SmallVector<unsigned, 8> &RP = BackTrace[i-1]; + if (RP[RCId] + CI->second >= RegLimit[RCId]) + return true; + } + } + + return false; +} + +/// UpdateBackTraceRegPressure - Traverse the back trace from header to the +/// current block and update their register pressures to reflect the effect +/// of hoisting MI from the current block to the preheader. +void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { + if (MI->isImplicitDef()) + return; + + // First compute the 'cost' of the instruction, i.e. its contribution + // to register pressure. + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned RCCost = TLI->getRepRegClassCostFor(VT); + if (MO.isDef()) { + DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); + if (CI != Cost.end()) + CI->second += RCCost; + else + Cost.insert(std::make_pair(RCId, RCCost)); + } else if (isOperandKill(MO, MRI)) { + DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); + if (CI != Cost.end()) + CI->second -= RCCost; + else + Cost.insert(std::make_pair(RCId, -RCCost)); + } + } + + // Update register pressure of blocks from loop header to current block. + for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { + SmallVector<unsigned, 8> &RP = BackTrace[i]; + for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); + CI != CE; ++CI) { + unsigned RCId = CI->first; + RP[RCId] += CI->second; + } } } /// IsProfitableToHoist - Return true if it is potentially profitable to hoist /// the given loop invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { - // FIXME: For now, only hoist re-materilizable instructions. LICM will - // increase register pressure. We want to make sure it doesn't increase - // spilling. + if (MI.isImplicitDef()) + return true; + + // If the instruction is cheap, only hoist if it is re-materilizable. LICM + // will increase register pressure. It's probably not worth it if the + // instruction is cheap. // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting // these tend to help performance in low register pressure situation. The // trade off is it may cause spill in high pressure situation. It will end up // adding a store in the loop preheader. But the reload is no more expensive. // The side benefit is these loads are frequently CSE'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA)) { - if (!isLoadFromConstantMemory(&MI)) + if (IsCheapInstruction(MI)) { + if (!TII->isTriviallyReMaterializable(&MI, AA)) + return false; + } else { + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + ++NumHighLatency; + return true; + } + + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned RCCost = TLI->getRepRegClassCostFor(VT); + DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); + if (CI != Cost.end()) + CI->second += RCCost; + else + Cost.insert(std::make_pair(RCId, RCCost)); + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + EVT VT = *RC->vt_begin(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned RCCost = TLI->getRepRegClassCostFor(VT); + DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); + if (CI != Cost.end()) + CI->second -= RCCost; + else + Cost.insert(std::make_pair(RCId, -RCCost)); + } + } + + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost)) { + ++NumLowRP; + return true; + } + + // High register pressure situation, only hoist if the instruction is going to + // be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) return false; } @@ -628,7 +983,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - if (HasPHIUses(MO.getReg(), RegInfo)) + if (HasPHIUses(MO.getReg(), MRI)) return false; } @@ -636,10 +991,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { } MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { + // Don't unfold simple loads. + if (MI->getDesc().canFoldAsLoad()) + return 0; + // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!isLoadFromConstantMemory(MI)) + if (!MI->isInvariantLoad(AA)) return 0; // Next determine the register class for a temporary register. @@ -654,7 +1013,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (TID.getNumDefs() != 1) return 0; const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. - unsigned Reg = RegInfo->createVirtualRegister(RC); + unsigned Reg = MRI->createVirtualRegister(RC); MachineFunction &MF = *MI->getParent()->getParent(); SmallVector<MachineInstr *, 2> NewMIs; @@ -678,6 +1037,10 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { NewMIs[1]->eraseFromParent(); return 0; } + + // Update register pressure for the unfolded instruction. + UpdateRegPressure(NewMIs[1]); + // Otherwise we successfully unfolded a load that we can hoist. MI->eraseFromParent(); return NewMIs[0]; @@ -686,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; - // FIXME: For now, only hoist re-materilizable instructions. LICM will - // increase register pressure. We want to make sure it doesn't increase - // spilling. - if (TII->isTriviallyReMaterializable(MI, AA)) { - unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator - CI = CSEMap.find(Opcode); - if (CI != CSEMap.end()) - CI->second.push_back(MI); - else { - std::vector<const MachineInstr*> CSEMIs; - CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(Opcode, CSEMIs)); - } + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector<const MachineInstr*> CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); } } } @@ -709,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector<const MachineInstr*> &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - if (TII->produceSameValue(MI, PrevMI)) + if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) return PrevMI; } return 0; @@ -738,8 +1096,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, if (MO.isReg() && MO.isDef() && !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); - RegInfo->clearKillFlags(Dup->getOperand(i).getReg()); + MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); + MRI->clearKillFlags(Dup->getOperand(i).getReg()); } } MI->eraseFromParent(); @@ -752,15 +1110,12 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// Hoist - When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// -void MachineLICM::Hoist(MachineInstr *MI) { - MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) return; - +bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. MI = ExtractHoistableLoad(MI); - if (!MI) return; + if (!MI) return false; } // Now move the instructions to the predecessor, inserting it before any @@ -791,13 +1146,16 @@ void MachineLICM::Hoist(MachineInstr *MI) { // Otherwise, splice the instruction to the preheader. Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); + // Update register pressure for BBs from header to this block. + UpdateBackTraceRegPressure(MI); + // Clear the kill flags of any register this instruction defines, // since they may need to be live throughout the entire loop // rather than just live for part of it. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef() && !MO.isDead()) - RegInfo->clearKillFlags(MO.getReg()); + MRI->clearKillFlags(MO.getReg()); } // Add to the CSE map. @@ -812,6 +1170,8 @@ void MachineLICM::Hoist(MachineInstr *MI) { ++NumHoisted; Changed = true; + + return true; } MachineBasicBlock *MachineLICM::getCurPreheader() { diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index bca4b0c28985..189cb2ba5d1d 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -30,8 +30,11 @@ TEMPLATE_INSTANTIATION(MLIB); } char MachineLoopInfo::ID = 0; -INITIALIZE_PASS(MachineLoopInfo, "machine-loops", - "Machine Natural Loop Construction", true, true); +INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true) char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp new file mode 100644 index 000000000000..17fe67f65045 --- /dev/null +++ b/lib/CodeGen/MachineLoopRanges.cpp @@ -0,0 +1,116 @@ +//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the MachineLoopRanges analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopRanges.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +char MachineLoopRanges::ID = 0; +INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges", + "Machine Loop Ranges", true, true) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges", + "Machine Loop Ranges", true, true) + +char &llvm::MachineLoopRangesID = MachineLoopRanges::ID; + +void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<SlotIndexes>(); + AU.addRequiredTransitive<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// runOnMachineFunction - Don't do much, loop ranges are computed on demand. +bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + Indexes = &getAnalysis<SlotIndexes>(); + return false; +} + +void MachineLoopRanges::releaseMemory() { + DeleteContainerSeconds(Cache); + Cache.clear(); +} + +MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) { + MachineLoopRange *&Range = Cache[Loop]; + if (!Range) + Range = new MachineLoopRange(Loop, Allocator, *Indexes); + return Range; +} + +/// Create a MachineLoopRange, only accessible to MachineLoopRanges. +MachineLoopRange::MachineLoopRange(const MachineLoop *loop, + MachineLoopRange::Allocator &alloc, + SlotIndexes &Indexes) + : Loop(loop), Intervals(alloc), Area(0) { + // Compute loop coverage. + for (MachineLoop::block_iterator I = Loop->block_begin(), + E = Loop->block_end(); I != E; ++I) { + const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I); + Intervals.insert(Range.first, Range.second, 1u); + Area += Range.first.distance(Range.second); + } +} + +/// overlaps - Return true if this loop overlaps the given range of machine +/// instructions. +bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) { + Map::const_iterator I = Intervals.find(Start); + return I.valid() && Stop > I.start(); +} + +unsigned MachineLoopRange::getNumber() const { + return Loop->getHeader()->getNumber(); +} + +/// byNumber - Comparator for array_pod_sort that sorts a list of +/// MachineLoopRange pointers by number. +int MachineLoopRange::byNumber(const void *pa, const void *pb) { + const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa); + const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb); + unsigned na = a->getNumber(); + unsigned nb = b->getNumber(); + if (na < nb) + return -1; + if (na > nb) + return 1; + return 0; +} + +/// byAreaDesc - Comparator for array_pod_sort that sorts a list of +/// MachineLoopRange pointers by: +/// 1. Descending area. +/// 2. Ascending number. +int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) { + const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa); + const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb); + if (a->getArea() != b->getArea()) + return a->getArea() > b->getArea() ? -1 : 1; + return byNumber(pa, pb); +} + +void MachineLoopRange::print(raw_ostream &OS) const { + OS << "Loop#" << getNumber() << " ="; + for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I) + OS << " [" << I.start() << ';' << I.stop() << ')'; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) { + MLR.print(OS); + return OS; +} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index b647a4dcc530..fadc594efcb2 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -29,7 +29,7 @@ using namespace llvm::dwarf; // Handle the Pass registration stuff necessary to use TargetData's. INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false); + "Machine Module Information", false, false) char MachineModuleInfo::ID = 0; // Out of line virtual method. @@ -41,30 +41,30 @@ class MMIAddrLabelMapCallbackPtr : CallbackVH { public: MMIAddrLabelMapCallbackPtr() : Map(0) {} MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {} - + void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); } - + void setMap(MMIAddrLabelMap *map) { Map = map; } - + virtual void deleted(); virtual void allUsesReplacedWith(Value *V2); }; - + class MMIAddrLabelMap { MCContext &Context; struct AddrLabelSymEntry { /// Symbols - The symbols for the label. This is a pointer union that is /// either one symbol (the common case) or a list of symbols. PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols; - + Function *Fn; // The containing function of the BasicBlock. unsigned Index; // The index in BBCallbacks for the BasicBlock. }; - + DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; - + /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We /// use this so we get notified if a block is deleted or RAUWd. std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; @@ -76,23 +76,23 @@ class MMIAddrLabelMap { DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> > DeletedAddrLabelsNeedingEmission; public: - + MMIAddrLabelMap(MCContext &context) : Context(context) {} ~MMIAddrLabelMap() { assert(DeletedAddrLabelsNeedingEmission.empty() && "Some labels for deleted blocks never got emitted"); - + // Deallocate any of the 'list of symbols' case. for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I) if (I->second.Symbols.is<std::vector<MCSymbol*>*>()) delete I->second.Symbols.get<std::vector<MCSymbol*>*>(); } - + MCSymbol *getAddrLabelSymbol(BasicBlock *BB); std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB); - void takeDeletedSymbolsForFunction(Function *F, + void takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result); void UpdateForDeletedBlock(BasicBlock *BB); @@ -104,7 +104,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) { assert(BB->hasAddressTaken() && "Shouldn't get label for block without address taken"); AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; - + // If we already had an entry for this block, just return it. if (!Entry.Symbols.isNull()) { assert(BB->getParent() == Entry.Fn && "Parent changed"); @@ -112,7 +112,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) { return Entry.Symbols.get<MCSymbol*>(); return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0]; } - + // Otherwise, this is a new entry, create a new symbol for it and add an // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. BBCallbacks.push_back(BB); @@ -129,9 +129,9 @@ MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { assert(BB->hasAddressTaken() && "Shouldn't get label for block without address taken"); AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; - + std::vector<MCSymbol*> Result; - + // If we already had an entry for this block, just return it. if (Entry.Symbols.isNull()) Result.push_back(getAddrLabelSymbol(BB)); @@ -152,7 +152,7 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { // If there are no entries for the function, just return. if (I == DeletedAddrLabelsNeedingEmission.end()) return; - + // Otherwise, take the list. std::swap(Result, I->second); DeletedAddrLabelsNeedingEmission.erase(I); @@ -175,7 +175,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) { if (Sym->isDefined()) return; - + // If the block is not yet defined, we need to emit it at the end of the // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list // for the containing Function. Since the block is being deleted, its @@ -187,7 +187,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { for (unsigned i = 0, e = Syms->size(); i != e; ++i) { MCSymbol *Sym = (*Syms)[i]; if (Sym->isDefined()) continue; // Ignore already emitted labels. - + // If the block is not yet defined, we need to emit it at the end of the // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list // for the containing Function. Since the block is being deleted, its @@ -195,7 +195,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { // 'Entry'. DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); } - + // The entry is deleted, free the memory associated with the symbol list. delete Syms; } @@ -225,7 +225,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { SymList->push_back(PrevSym); NewEntry.Symbols = SymList; } - + std::vector<MCSymbol*> *SymList = NewEntry.Symbols.get<std::vector<MCSymbol*>*>(); @@ -234,7 +234,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { SymList->push_back(Sym); return; } - + // Otherwise, concatenate the list. std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>(); SymList->insert(SymList->end(), Syms->begin(), Syms->end()); @@ -253,10 +253,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { //===----------------------------------------------------------------------===// -MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) -: ImmutablePass(ID), Context(MAI), +MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, + const TargetAsmInfo *TAI) +: ImmutablePass(ID), Context(MAI, TAI), ObjFileMMI(0), - CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){ + CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false), + CallsExternalVAFunctionWithFloatingPointArguments(false) { + initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); AddrLabelSymbols = 0; @@ -264,7 +267,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI) } MachineModuleInfo::MachineModuleInfo() -: ImmutablePass(ID), Context(*(MCAsmInfo*)0) { +: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) { assert(0 && "This MachineModuleInfo constructor should never be called, MMI " "should always be explicitly constructed by LLVMTargetMachine"); abort(); @@ -272,7 +275,7 @@ MachineModuleInfo::MachineModuleInfo() MachineModuleInfo::~MachineModuleInfo() { delete ObjFileMMI; - + // FIXME: Why isn't doFinalization being called?? //assert(AddrLabelSymbols == 0 && "doFinalization not called"); delete AddrLabelSymbols; @@ -472,7 +475,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { (LPMap && (*LPMap)[BeginLabel] != 0)) && (EndLabel->isDefined() || (LPMap && (*LPMap)[EndLabel] != 0))) continue; - + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); --j, --e; @@ -562,20 +565,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { // in the zero index. return 0; } - -namespace { - /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map - /// by source location, to avoid depending on the arbitrary order that - /// instruction selection visits variables in. - struct VariableDebugSorter { - bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A, - const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B) - const { - if (A.second.second.getLine() != B.second.second.getLine()) - return A.second.second.getLine() < B.second.second.getLine(); - if (A.second.second.getCol() != B.second.second.getCol()) - return A.second.second.getCol() < B.second.second.getCol(); - return false; - } - }; -} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 5d852f26beda..b3fb33736ffc 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -30,8 +30,9 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { MachineRegisterInfo::~MachineRegisterInfo() { #ifndef NDEBUG - for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) - assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?"); + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 && + "Vreg use list non-empty still?"); for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i) assert(!PhysRegUseDefLists[i] && "PhysRegUseDefLists has entries after all instructions are deleted"); @@ -44,20 +45,32 @@ MachineRegisterInfo::~MachineRegisterInfo() { /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { - unsigned VR = Reg; - Reg -= TargetRegisterInfo::FirstVirtualRegister; - assert(Reg < VRegInfo.size() && "Invalid vreg!"); const TargetRegisterClass *OldRC = VRegInfo[Reg].first; VRegInfo[Reg].first = RC; // Remove from old register class's vregs list. This may be slow but // fortunately this operation is rarely needed. std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR); + std::vector<unsigned>::iterator I = + std::find(VRegs.begin(), VRegs.end(), Reg); VRegs.erase(I); // Add to new register class's vregs list. - RegClass2VRegMap[RC->getID()].push_back(VR); + RegClass2VRegMap[RC->getID()].push_back(Reg); +} + +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(unsigned Reg, + const TargetRegisterClass *RC) { + const TargetRegisterClass *OldRC = getRegClass(Reg); + if (OldRC == RC) + return RC; + const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC); + if (!NewRC) + return 0; + if (NewRC != OldRC) + setRegClass(Reg, NewRC); + return NewRC; } /// createVirtualRegister - Create and return a new virtual register in the @@ -66,17 +79,22 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { unsigned MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ assert(RegClass && "Cannot create register without RegClass!"); + + // New virtual register number. + unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + // Add a reg, but keep track of whether the vector reallocated or not. - void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0]; - VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0)); - RegAllocHints.push_back(std::make_pair(0, 0)); + const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0); + void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg]; + VRegInfo.grow(Reg); + VRegInfo[Reg].first = RegClass; + RegAllocHints.grow(Reg); - if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1))) + if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) // The vector reallocated, handle this now. HandleVRegListReallocation(); - unsigned VR = getLastVirtReg(); - RegClass2VRegMap[RegClass->getID()].push_back(VR); - return VR; + RegClass2VRegMap[RegClass->getID()].push_back(Reg); + return Reg; } /// HandleVRegListReallocation - We just added a virtual register to the @@ -85,11 +103,12 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ void MachineRegisterInfo::HandleVRegListReallocation() { // The back pointers for the vreg lists point into the previous vector. // Update them to point to their correct slots. - for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) { - MachineOperand *List = VRegInfo[i].second; + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + MachineOperand *List = VRegInfo[Reg].second; if (!List) continue; // Update the back-pointer to be accurate once more. - List->Contents.Reg.Prev = &VRegInfo[i].second; + List->Contents.Reg.Prev = &VRegInfo[Reg].second; } } @@ -112,8 +131,6 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// register or null if none is found. This assumes that the code is in SSA /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { - assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() && - "Invalid vreg!"); // Since we are in SSA form, we can use the first definition. if (!def_empty(Reg)) return &*def_begin(Reg); @@ -193,8 +210,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LiveIns.erase(LiveIns.begin() + i); --i; --e; } else { + DebugLoc DL; + // If there is a location for this live in then use it. + DenseMap<unsigned, DebugLoc>::iterator DLI = + LiveInLocs.find(LiveIns[i].second); + if (DLI != LiveInLocs.end()) + DL = DLI->second; + // Emit a copy. - BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + BuildMI(*EntryMBB, EntryMBB->begin(), DL, TII.get(TargetOpcode::COPY), LiveIns[i].second) .addReg(LiveIns[i].first); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index c8f8fafe227e..8a93a24287b6 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -25,6 +25,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -34,27 +35,31 @@ using namespace llvm; static cl::opt<bool> SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), - cl::init(false), cl::Hidden); -static cl::opt<unsigned> -SplitLimit("split-limit", - cl::init(~0u), cl::Hidden); + cl::init(true), cl::Hidden); -STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumSplit, "Number of critical edges split"); +STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumSplit, "Number of critical edges split"); +STATISTIC(NumCoalesces, "Number of copies coalesced"); namespace { class MachineSinking : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineRegisterInfo *RegInfo; // Machine register information + MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachineLoopInfo *LI; AliasAnalysis *AA; BitVector AllocatableSet; // Which physregs are allocatable? + // Remember which edges have been considered for breaking. + SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> + CEBCandidates; + public: static char ID; // Pass identification - MachineSinking() : MachineFunctionPass(ID) {} + MachineSinking() : MachineFunctionPass(ID) { + initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -67,43 +72,125 @@ namespace { AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); } + + virtual void releaseMemory() { + CEBCandidates.clear(); + } + private: bool ProcessBlock(MachineBasicBlock &MBB); - MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From, - MachineBasicBlock *To); + bool isWorthBreakingCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To); + MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To, + bool BreakPHIEdge); bool SinkInstruction(MachineInstr *MI, bool &SawStore); bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, - MachineBasicBlock *DefMBB, bool &LocalUse) const; + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, bool &LocalUse) const; + bool PerformTrivialForwardCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB); }; } // end anonymous namespace char MachineSinking::ID = 0; -INITIALIZE_PASS(MachineSinking, "machine-sink", - "Machine code sinking", false, false); +INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", + "Machine code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(MachineSinking, "machine-sink", + "Machine code sinking", false, false) FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } +bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB) { + if (!MI->isCopy()) + return false; + + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + !TargetRegisterInfo::isVirtualRegister(DstReg) || + !MRI->hasOneNonDBGUse(SrcReg)) + return false; + + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *DRC = MRI->getRegClass(DstReg); + if (SRC != DRC) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isCopyLike()) + return false; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MRI->replaceRegWith(DstReg, SrcReg); + MI->eraseFromParent(); + ++NumCoalesces; + return true; +} + /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. If any use is in the /// definition block, then return false since it is never legal to move def /// after uses. -bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, - MachineBasicBlock *MBB, - MachineBasicBlock *DefMBB, - bool &LocalUse) const { +bool +MachineSinking::AllUsesDominatedByBlock(unsigned Reg, + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, + bool &LocalUse) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); + + if (MRI->use_nodbg_empty(Reg)) + return true; + // Ignoring debug uses is necessary so debug info doesn't affect the code. // This may leave a referencing dbg_value in the original block, before // the definition of the vreg. Dwarf generator handles this although the // user might not get the right info at runtime. + + // BreakPHIEdge is true if all the uses are in the successor MBB being sunken + // into and they are all PHI nodes. In this case, machine-sink must break + // the critical edge first. e.g. + // + // BB#1: derived from LLVM BB %bb4.preheader + // Predecessors according to CFG: BB#0 + // ... + // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead> + // ... + // JE_4 <BB#37>, %EFLAGS<imp-use> + // Successors according to CFG: BB#37 BB#2 + // + // BB#2: derived from LLVM BB %bb.nph + // Predecessors according to CFG: BB#0 BB#1 + // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1> + BreakPHIEdge = true; for (MachineRegisterInfo::use_nodbg_iterator - I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end(); + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; ++I) { - // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); + if (!(UseBlock == MBB && UseInst->isPHI() && + UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) { + BreakPHIEdge = false; + break; + } + } + if (BreakPHIEdge) + return true; + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); + I != E; ++I) { + // Determine the block of the use. + MachineInstr *UseInst = &*I; + MachineBasicBlock *UseBlock = UseInst->getParent(); if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. @@ -127,7 +214,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); - RegInfo = &MF.getRegInfo(); + MRI = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); LI = &getAnalysis<MachineLoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); @@ -139,6 +226,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; // Process all basic blocks. + CEBCandidates.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); @@ -177,6 +265,9 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MI->isDebugValue()) continue; + if (PerformTrivialForwardCoalescing(MI, &MBB)) + continue; + if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; @@ -186,51 +277,92 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { return MadeChange; } -MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB, - MachineBasicBlock *ToBB) { +bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To) { + // FIXME: Need much better heuristics. + + // If the pass has already considered breaking this edge (during this pass + // through the function), then let's go ahead and break it. This means + // sinking multiple "cheap" instructions into the same block. + if (!CEBCandidates.insert(std::make_pair(From, To))) + return true; + + if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove()) + return true; + + // MI is cheap, we probably don't want to break the critical edge for it. + // However, if this would allow some definitions of its source operands + // to be sunk then it's probably worth it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MRI->hasOneNonDBGUse(Reg)) + return true; + } + + return false; +} + +MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { + if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) + return 0; + // Avoid breaking back edge. From == To means backedge for single BB loop. - if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB) + if (!SplitEdges || FromBB == ToBB) + return 0; + + // Check for backedges of more "complex" loops. + if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && + LI->isLoopHeader(ToBB)) return 0; - // Check for more "complex" loops. - if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) || - !LI->isLoopHeader(ToBB)) { - // It's not always legal to break critical edges and sink the computation - // to the edge. - // - // BB#1: - // v1024 - // Beq BB#3 - // <fallthrough> - // BB#2: - // ... no uses of v1024 - // <fallthrough> - // BB#3: - // ... - // = v1024 - // - // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: - // - // BB#1: - // ... - // Bne BB#2 - // BB#4: - // v1024 = - // B BB#3 - // BB#2: - // ... no uses of v1024 - // <fallthrough> - // BB#3: - // ... - // = v1024 - // - // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 - // flow. We need to ensure the new basic block where the computation is - // sunk to dominates all the uses. - // It's only legal to break critical edge and sink the computation to the - // new block if all the predecessors of "To", except for "From", are - // not dominated by "From". Given SSA property, this means these - // predecessors are dominated by "To". + // It's not always legal to break critical edges and sink the computation + // to the edge. + // + // BB#1: + // v1024 + // Beq BB#3 + // <fallthrough> + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // + // BB#1: + // ... + // Bne BB#2 + // BB#4: + // v1024 = + // B BB#3 + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // flow. We need to ensure the new basic block where the computation is + // sunk to dominates all the uses. + // It's only legal to break critical edge and sink the computation to the + // new block if all the predecessors of "To", except for "From", are + // not dominated by "From". Given SSA property, this means these + // predecessors are dominated by "To". + // + // There is no need to do this check if all the uses are PHI nodes. PHI + // sources are only defined on the specific predecessor edges. + if (!BreakPHIEdge) { for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), E = ToBB->pred_end(); PI != E; ++PI) { if (*PI == FromBB) @@ -238,17 +370,23 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB, if (!DT->dominates(ToBB, *PI)) return 0; } - - // FIXME: Determine if it's cost effective to break this edge. - return FromBB->SplitCriticalEdge(ToBB, this); } - return 0; + return FromBB->SplitCriticalEdge(ToBB, this); +} + +static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { + return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); } /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { + // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to + // be close to the source to make it easier to coalesce. + if (AvoidsSinking(MI, MRI)) + return false; + // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; @@ -269,6 +407,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // decide. MachineBasicBlock *SuccToSinkTo = 0; + bool BreakPHIEdge = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. @@ -281,7 +420,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!RegInfo->def_empty(Reg)) + if (!MRI->def_empty(Reg)) return false; if (AllocatableSet.test(Reg)) @@ -290,7 +429,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; - if (!RegInfo->def_empty(AliasReg)) + if (!MRI->def_empty(AliasReg)) return false; if (AllocatableSet.test(AliasReg)) @@ -305,7 +444,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (MO.isUse()) continue; // If it's not safe to move defs of the register class, then abort. - if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) + if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) return false; // FIXME: This picks a successor to sink into based on having one @@ -327,7 +466,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. bool LocalUse = false; - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse)) + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, + BreakPHIEdge, LocalUse)) return false; continue; @@ -338,7 +478,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), E = ParentBlock->succ_end(); SI != E; ++SI) { bool LocalUse = false; - if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) { + if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, + BreakPHIEdge, LocalUse)) { SuccToSinkTo = *SI; break; } @@ -384,7 +525,6 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. - // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. @@ -412,10 +552,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (!TryBreak) DEBUG(dbgs() << "Sinking along critical edge.\n"); else { - MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo); + MachineBasicBlock *NewSucc = + SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!NewSucc) { - DEBUG(dbgs() << - " *** PUNTING: Not legal or profitable to break critical edge\n"); + DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); return false; } else { DEBUG(dbgs() << " *** Splitting critical edge:" @@ -424,10 +565,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); SuccToSinkTo = NewSucc; ++NumSplit; + BreakPHIEdge = false; } } } + if (BreakPHIEdge) { + // BreakPHIEdge is true if all the uses are in the successor MBB being + // sunken into and they are all PHI nodes. In this case, machine-sink must + // break the critical edge first. + MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock, + SuccToSinkTo, BreakPHIEdge); + if (!NewSucc) { + DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); + return false; + } + + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << ParentBlock->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); + SuccToSinkTo = NewSucc; + ++NumSplit; + } + // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 1e88562935ea..7351119f4728 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -26,6 +26,7 @@ #include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -45,14 +46,16 @@ using namespace llvm; namespace { struct MachineVerifier { - MachineVerifier(Pass *pass) : + MachineVerifier(Pass *pass, const char *b) : PASS(pass), + Banner(b), OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) {} bool runOnMachineFunction(MachineFunction &MF); Pass *const PASS; + const char *Banner; const char *const OutFileName; raw_ostream *OS; const MachineFunction *MF; @@ -71,6 +74,8 @@ namespace { RegVector regsDefined, regsDead, regsKilled; RegSet regsLiveInButUnused; + SlotIndex lastIndex; + // Add Reg and any sub-registers to RV void addRegWithSubRegs(RegVector &RV, unsigned Reg) { RV.push_back(Reg); @@ -167,7 +172,9 @@ namespace { // Analysis information if available LiveVariables *LiveVars; - const LiveIntervals *LiveInts; + LiveIntervals *LiveInts; + LiveStacks *LiveStks; + SlotIndexes *Indexes; void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); @@ -193,9 +200,12 @@ namespace { struct MachineVerifierPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid + const char *const Banner; - MachineVerifierPass() - : MachineFunctionPass(ID) {} + MachineVerifierPass(const char *b = 0) + : MachineFunctionPass(ID), Banner(b) { + initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); + } void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -203,7 +213,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) { - MF.verify(this); + MF.verify(this, Banner); return false; } }; @@ -212,14 +222,15 @@ namespace { char MachineVerifierPass::ID = 0; INITIALIZE_PASS(MachineVerifierPass, "machineverifier", - "Verify generated machine code", false, false); + "Verify generated machine code", false, false) -FunctionPass *llvm::createMachineVerifierPass() { - return new MachineVerifierPass(); +FunctionPass *llvm::createMachineVerifierPass(const char *Banner) { + return new MachineVerifierPass(Banner); } -void MachineFunction::verify(Pass *p) const { - MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this)); +void MachineFunction::verify(Pass *p, const char *Banner) const { + MachineVerifier(p, Banner) + .runOnMachineFunction(const_cast<MachineFunction&>(*this)); } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { @@ -247,11 +258,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { LiveVars = NULL; LiveInts = NULL; + LiveStks = NULL; + Indexes = NULL; if (PASS) { LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); // We don't want to verify LiveVariables if LiveIntervals is available. if (!LiveInts) LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); + LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>(); + Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>(); } visitMachineFunctionBefore(); @@ -260,6 +275,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { visitMachineBasicBlockBefore(MFI); for (MachineBasicBlock::const_iterator MBBI = MFI->begin(), MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + if (MBBI->getParent() != MFI) { + report("Bad instruction parent pointer", MFI); + *OS << "Instruction: " << *MBBI; + continue; + } visitMachineInstrBefore(MBBI); for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); @@ -288,8 +308,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); *OS << '\n'; - if (!foundErrors++) - MF->print(*OS); + if (!foundErrors++) { + if (Banner) + *OS << "# " << Banner << '\n'; + MF->print(*OS, Indexes); + } *OS << "*** Bad machine code: " << msg << " ***\n" << "- function: " << MF->getFunction()->getNameStr() << "\n"; } @@ -299,13 +322,19 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { report(msg, MBB->getParent()); *OS << "- basic block: " << MBB->getName() << " " << (void*)MBB - << " (BB#" << MBB->getNumber() << ")\n"; + << " (BB#" << MBB->getNumber() << ")"; + if (Indexes) + *OS << " [" << Indexes->getMBBStartIdx(MBB) + << ';' << Indexes->getMBBEndIdx(MBB) << ')'; + *OS << '\n'; } void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); *OS << "- instruction: "; + if (Indexes && Indexes->hasIndex(MI)) + *OS << Indexes->getInstructionIndex(MI) << '\t'; MI->print(*OS, TM); } @@ -329,6 +358,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { } void MachineVerifier::visitMachineFunctionBefore() { + lastIndex = SlotIndex(); regsReserved = TRI->getReservedRegs(*MF); // A sub-register of a reserved register is also reserved @@ -357,6 +387,16 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + // Count the number of landing pad successors. + SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if ((*I)->isLandingPad()) + LandingPadSuccs.insert(*I); + } + if (LandingPadSuccs.size() > 1) + report("MBB has more than one landing pad successor", MBB); + // Call AnalyzeBranch. If it succeeds, there several more conditions to check. MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; @@ -372,14 +412,14 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // It's possible that the block legitimately ends with a noreturn // call or an unreachable, in which case it won't actually fall // out the bottom of the function. - } else if (MBB->succ_empty()) { + } else if (MBB->succ_size() == LandingPadSuccs.size()) { // It's possible that the block legitimately ends with a noreturn // call or an unreachable, in which case it won't actuall fall // out of the block. - } else if (MBB->succ_size() != 1) { + } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional fall-through but doesn't have " "exactly one CFG successor!", MBB); - } else if (MBB->succ_begin()[0] != MBBI) { + } else if (!MBB->isSuccessor(MBBI)) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } @@ -394,10 +434,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && Cond.empty()) { // Block unconditionally branches somewhere. - if (MBB->succ_size() != 1) { + if (MBB->succ_size() != 1+LandingPadSuccs.size()) { report("MBB exits via unconditional branch but doesn't have " "exactly one CFG successor!", MBB); - } else if (MBB->succ_begin()[0] != TBB) { + } else if (!MBB->isSuccessor(TBB)) { report("MBB exits via unconditional branch but the CFG " "successor doesn't match the actual successor!", MBB); } @@ -487,6 +527,9 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsKilled.clear(); regsDefined.clear(); + + if (Indexes) + lastIndex = Indexes->getMBBStartIdx(MBB); } void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { @@ -525,6 +568,7 @@ void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const TargetInstrDesc &TI = MI->getDesc(); + const TargetOperandInfo &TOI = TI.OpInfo[MONum]; // The first TI.NumDefs operands must be explicit register defines if (MONum < TI.getNumDefs()) { @@ -535,9 +579,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); } else if (MONum < TI.getNumOperands()) { - if (MO->isReg()) { - if (MO->isDef()) - report("Explicit operand marked as def", MO, MONum); + // Don't check if it's the last operand in a variadic instruction. See, + // e.g., LDM_RET in the arm back end. + if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) { + if (MO->isDef() && !TOI.isOptionalDef()) + report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } @@ -554,7 +600,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { return; // Check Live Variables. - if (MO->isUndef()) { + if (MI->isDebugValue()) { + // Liveness checks are not valid for debug values. + } else if (MO->isUndef()) { // An <undef> doesn't refer to any register, so just skip it. } else if (MO->isUse()) { regsLiveInButUnused.erase(Reg); @@ -566,7 +614,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { unsigned DefReg = MI->getOperand(defIdx).getReg(); if (Reg == DefReg) { isKill = true; - // ANd in that case an explicit kill flag is not allowed. + // And in that case an explicit kill flag is not allowed. if (MO->isKill()) report("Illegal kill flag on two-address instruction operand", MO, MONum); @@ -590,7 +638,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + if (TargetRegisterInfo::isVirtualRegister(Reg) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); @@ -598,8 +647,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("No live range at use", MO, MONum); *OS << UseIdx << " is not live in " << LI << '\n'; } - // TODO: Verify isKill == LI.killedAt. - } else if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { report("Virtual register has no Live interval", MO, MONum); } } @@ -636,11 +690,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); - if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) { - assert(LR->valno && "NULL valno is not allowed"); - if (LR->valno->def != DefIdx) { + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx && !MO->isEarlyClobber()) { report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << LR->valno->id << " is not defined at " + *OS << "Valno " << VNI->id << " is not defined at " << DefIdx << " in " << LI << '\n'; } } else { @@ -655,7 +709,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check register classes. if (MONum < TI.getNumOperands() && !MO->isImplicit()) { - const TargetOperandInfo &TOI = TI.OpInfo[MONum]; unsigned SubIdx = MO->getSubReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -706,6 +759,22 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("PHI operand is not in the CFG", MO, MONum); break; + case MachineOperand::MO_FrameIndex: + if (LiveStks && LiveStks->hasInterval(MO->getIndex()) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); + SlotIndex Idx = LiveInts->getInstructionIndex(MI); + if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + report("Instruction loads from dead spill slot", MO, MONum); + *OS << "Live stack: " << LI << '\n'; + } + if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + report("Instruction stores to dead spill slot", MO, MONum); + *OS << "Live stack: " << LI << '\n'; + } + } + break; + default: break; } @@ -717,12 +786,31 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { set_subtract(regsLive, regsKilled); regsKilled.clear(); set_subtract(regsLive, regsDead); regsDead.clear(); set_union(regsLive, regsDefined); regsDefined.clear(); + + if (Indexes && Indexes->hasIndex(MI)) { + SlotIndex idx = Indexes->getInstructionIndex(MI); + if (!(idx > lastIndex)) { + report("Instruction index out of order", MI); + *OS << "Last instruction was at " << lastIndex << '\n'; + } + lastIndex = idx; + } } void MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { MBBInfoMap[MBB].regsLiveOut = regsLive; regsLive.clear(); + + if (Indexes) { + SlotIndex stop = Indexes->getMBBEndIdx(MBB); + if (!(stop > lastIndex)) { + report("Block ends before last instruction index", MBB); + *OS << "Block ends at " << stop + << " last instruction was at " << lastIndex << '\n'; + } + lastIndex = stop; + } } // Calculate the largest possible vregsPassed sets. These are the registers that @@ -854,8 +942,8 @@ void MachineVerifier::visitMachineFunctionAfter() { void MachineVerifier::verifyLiveVariables() { assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); - for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, - RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { @@ -865,13 +953,13 @@ void MachineVerifier::verifyLiveVariables() { if (MInfo.vregsRequired.count(Reg)) { if (!VI.AliveBlocks.test(MFI->getNumber())) { report("LiveVariables: Block missing from AliveBlocks", MFI); - *OS << "Virtual register %reg" << Reg + *OS << "Virtual register " << PrintReg(Reg) << " must be live through the block.\n"; } } else { if (VI.AliveBlocks.test(MFI->getNumber())) { report("LiveVariables: Block should not be in AliveBlocks", MFI); - *OS << "Virtual register %reg" << Reg + *OS << "Virtual register " << PrintReg(Reg) << " is not needed live through the block.\n"; } } @@ -884,14 +972,24 @@ void MachineVerifier::verifyLiveIntervals() { for (LiveIntervals::const_iterator LVI = LiveInts->begin(), LVE = LiveInts->end(); LVI != LVE; ++LVI) { const LiveInterval &LI = *LVI->second; + + // Spilling and splitting may leave unused registers around. Skip them. + if (MRI->use_empty(LI.reg)) + continue; + + // Physical registers have much weirdness going on, mostly from coalescing. + // We should probably fix it, but for now just ignore them. + if (TargetRegisterInfo::isPhysicalRegister(LI.reg)) + continue; + assert(LVI->first == LI.reg && "Invalid reg to interval mapping"); for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); I!=E; ++I) { VNInfo *VNI = *I; - const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def); + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); - if (!DefLR) { + if (!DefVNI) { if (!VNI->isUnused()) { report("Valno not live at def and not marked unused", MF); *OS << "Valno #" << VNI->id << " in " << LI << '\n'; @@ -902,31 +1000,216 @@ void MachineVerifier::verifyLiveIntervals() { if (VNI->isUnused()) continue; - if (DefLR->valno != VNI) { + if (DefVNI != VNI) { report("Live range at def has different valno", MF); - DefLR->print(*OS); - *OS << " should use valno #" << VNI->id << " in " << LI << '\n'; + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live in " << LI << '\n'; + continue; } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + continue; + } + + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MF); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() + << " in " << LI << '\n'; + } + } else { + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MF); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + } else if (!MI->modifiesRegister(LI.reg, TRI)) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } + + bool isEarlyClobber = false; + if (MI) { + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && + MOI->isEarlyClobber()) { + isEarlyClobber = true; + break; + } + } + } + + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isUse()) { + report("Early clobber def must be at a USE slot", MF); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + } + } else if (!VNI->def.isDef()) { + report("Non-PHI, non-early clobber def must be at a DEF slot", MF); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + } + } } for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { - const LiveRange &LR = *I; - assert(LR.valno && "Live range has no valno"); + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); - if (LR.valno->id >= LI.getNumValNums() || - LR.valno != LI.getValNumInfo(LR.valno->id)) { + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { report("Foreign valno in live range", MF); - LR.print(*OS); + I->print(*OS); *OS << " has a valno not in " << LI << '\n'; } - if (LR.valno->isUnused()) { + if (VNI->isUnused()) { report("Live range valno is marked unused", MF); - LR.print(*OS); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF); + I->print(*OS); *OS << " in " << LI << '\n'; + continue; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB); + I->print(*OS); + *OS << " in " << LI << '\n' << "Basic block starts at " + << MBBStartIdx << '\n'; + } + + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF); + I->print(*OS); + *OS << " in " << LI << '\n'; + continue; + } + if (I->end != LiveInts->getMBBEndIdx(EndMBB)) { + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB); + I->print(*OS); + *OS << " in " << LI << '\n' << "Basic block starts at " + << MBBStartIdx << '\n'; + } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) && + !MI->readsVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + // FIXME: Should we check for each of these? + bool hasDeadDef = false; + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) { + hasDeadDef = true; + break; + } + } + + if (!hasDeadDef) { + report("Instruction killing live segment neither defines nor reads " + "register", MI); + I->print(*OS); + *OS << " in " << LI << '\n'; + } + } + } + + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + continue; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (TargetRegisterInfo::isPhysicalRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot(); + const VNInfo *PVNI = LI.getVNInfoAt(PEnd); + + if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) { + if (PVNI && !PVNI->hasPHIKill()) { + report("Value live out of predecessor doesn't have PHIKill", MF); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << " doesn't have PHIKill, but Valno #" << VNI->id + << " is PHIDef and defined at the beginning of BB#" + << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) + << " in " << LI << '\n'; + } + continue; + } + + if (!PVNI) { + report("Register not marked live out of predecessor", *PI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at " + << PEnd << " in " << LI << '\n'; + continue; + } + + if (PVNI != VNI) { + report("Different value live out of predecessor", *PI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n'; + } + } + if (&*MFI == EndMBB) + break; + ++MFI; } + } + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF); + *OS << NumComp << " components in " << LI << '\n'; + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; + } + } } } } diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index edb4eea71b8a..c05be130ec61 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -33,7 +33,9 @@ namespace { public: static char ID; // Pass identification - OptimizePHIs() : MachineFunctionPass(ID) {} + OptimizePHIs() : MachineFunctionPass(ID) { + initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -55,7 +57,7 @@ namespace { char OptimizePHIs::ID = 0; INITIALIZE_PASS(OptimizePHIs, "opt-phis", - "Optimize machine instruction PHIs", false, false); + "Optimize machine instruction PHIs", false, false) FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); } diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h deleted file mode 100644 index b2224cb051dc..000000000000 --- a/lib/CodeGen/PBQP/Graph.h +++ /dev/null @@ -1,425 +0,0 @@ -//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// PBQP Graph class. -// -//===----------------------------------------------------------------------===// - - -#ifndef LLVM_CODEGEN_PBQP_GRAPH_H -#define LLVM_CODEGEN_PBQP_GRAPH_H - -#include "Math.h" - -#include <list> -#include <vector> -#include <map> - -namespace PBQP { - - /// PBQP Graph class. - /// Instances of this class describe PBQP problems. - class Graph { - private: - - // ----- TYPEDEFS ----- - class NodeEntry; - class EdgeEntry; - - typedef std::list<NodeEntry> NodeList; - typedef std::list<EdgeEntry> EdgeList; - - public: - - typedef NodeList::iterator NodeItr; - typedef NodeList::const_iterator ConstNodeItr; - - typedef EdgeList::iterator EdgeItr; - typedef EdgeList::const_iterator ConstEdgeItr; - - private: - - typedef std::list<EdgeItr> AdjEdgeList; - - public: - - typedef AdjEdgeList::iterator AdjEdgeItr; - - private: - - class NodeEntry { - private: - Vector costs; - AdjEdgeList adjEdges; - unsigned degree; - void *data; - public: - NodeEntry(const Vector &costs) : costs(costs), degree(0) {} - Vector& getCosts() { return costs; } - const Vector& getCosts() const { return costs; } - unsigned getDegree() const { return degree; } - AdjEdgeItr edgesBegin() { return adjEdges.begin(); } - AdjEdgeItr edgesEnd() { return adjEdges.end(); } - AdjEdgeItr addEdge(EdgeItr e) { - ++degree; - return adjEdges.insert(adjEdges.end(), e); - } - void removeEdge(AdjEdgeItr ae) { - --degree; - adjEdges.erase(ae); - } - void setData(void *data) { this->data = data; } - void* getData() { return data; } - }; - - class EdgeEntry { - private: - NodeItr node1, node2; - Matrix costs; - AdjEdgeItr node1AEItr, node2AEItr; - void *data; - public: - EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs) - : node1(node1), node2(node2), costs(costs) {} - NodeItr getNode1() const { return node1; } - NodeItr getNode2() const { return node2; } - Matrix& getCosts() { return costs; } - const Matrix& getCosts() const { return costs; } - void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; } - AdjEdgeItr getNode1AEItr() { return node1AEItr; } - void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; } - AdjEdgeItr getNode2AEItr() { return node2AEItr; } - void setData(void *data) { this->data = data; } - void *getData() { return data; } - }; - - // ----- MEMBERS ----- - - NodeList nodes; - unsigned numNodes; - - EdgeList edges; - unsigned numEdges; - - // ----- INTERNAL METHODS ----- - - NodeEntry& getNode(NodeItr nItr) { return *nItr; } - const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; } - - EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; } - const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; } - - NodeItr addConstructedNode(const NodeEntry &n) { - ++numNodes; - return nodes.insert(nodes.end(), n); - } - - EdgeItr addConstructedEdge(const EdgeEntry &e) { - assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() && - "Attempt to add duplicate edge."); - ++numEdges; - EdgeItr edgeItr = edges.insert(edges.end(), e); - EdgeEntry &ne = getEdge(edgeItr); - NodeEntry &n1 = getNode(ne.getNode1()); - NodeEntry &n2 = getNode(ne.getNode2()); - // Sanity check on matrix dimensions: - assert((n1.getCosts().getLength() == ne.getCosts().getRows()) && - (n2.getCosts().getLength() == ne.getCosts().getCols()) && - "Edge cost dimensions do not match node costs dimensions."); - ne.setNode1AEItr(n1.addEdge(edgeItr)); - ne.setNode2AEItr(n2.addEdge(edgeItr)); - return edgeItr; - } - - inline void copyFrom(const Graph &other); - public: - - /// \brief Construct an empty PBQP graph. - Graph() : numNodes(0), numEdges(0) {} - - /// \brief Copy construct this graph from "other". Note: Does not copy node - /// and edge data, only graph structure and costs. - /// @param other Source graph to copy from. - Graph(const Graph &other) : numNodes(0), numEdges(0) { - copyFrom(other); - } - - /// \brief Make this graph a copy of "other". Note: Does not copy node and - /// edge data, only graph structure and costs. - /// @param other The graph to copy from. - /// @return A reference to this graph. - /// - /// This will clear the current graph, erasing any nodes and edges added, - /// before copying from other. - Graph& operator=(const Graph &other) { - clear(); - copyFrom(other); - return *this; - } - - /// \brief Add a node with the given costs. - /// @param costs Cost vector for the new node. - /// @return Node iterator for the added node. - NodeItr addNode(const Vector &costs) { - return addConstructedNode(NodeEntry(costs)); - } - - /// \brief Add an edge between the given nodes with the given costs. - /// @param n1Itr First node. - /// @param n2Itr Second node. - /// @return Edge iterator for the added edge. - EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr, - const Matrix &costs) { - assert(getNodeCosts(n1Itr).getLength() == costs.getRows() && - getNodeCosts(n2Itr).getLength() == costs.getCols() && - "Matrix dimensions mismatch."); - return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); - } - - /// \brief Get the number of nodes in the graph. - /// @return Number of nodes in the graph. - unsigned getNumNodes() const { return numNodes; } - - /// \brief Get the number of edges in the graph. - /// @return Number of edges in the graph. - unsigned getNumEdges() const { return numEdges; } - - /// \brief Get a node's cost vector. - /// @param nItr Node iterator. - /// @return Node cost vector. - Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); } - - /// \brief Get a node's cost vector (const version). - /// @param nItr Node iterator. - /// @return Node cost vector. - const Vector& getNodeCosts(ConstNodeItr nItr) const { - return getNode(nItr).getCosts(); - } - - /// \brief Set a node's data pointer. - /// @param nItr Node iterator. - /// @param data Pointer to node data. - /// - /// Typically used by a PBQP solver to attach data to aid in solution. - void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); } - - /// \brief Get the node's data pointer. - /// @param nItr Node iterator. - /// @return Pointer to node data. - void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); } - - /// \brief Get an edge's cost matrix. - /// @param eItr Edge iterator. - /// @return Edge cost matrix. - Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); } - - /// \brief Get an edge's cost matrix (const version). - /// @param eItr Edge iterator. - /// @return Edge cost matrix. - const Matrix& getEdgeCosts(ConstEdgeItr eItr) const { - return getEdge(eItr).getCosts(); - } - - /// \brief Set an edge's data pointer. - /// @param eItr Edge iterator. - /// @param data Pointer to edge data. - /// - /// Typically used by a PBQP solver to attach data to aid in solution. - void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); } - - /// \brief Get an edge's data pointer. - /// @param eItr Edge iterator. - /// @return Pointer to edge data. - void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); } - - /// \brief Get a node's degree. - /// @param nItr Node iterator. - /// @return The degree of the node. - unsigned getNodeDegree(NodeItr nItr) const { - return getNode(nItr).getDegree(); - } - - /// \brief Begin iterator for node set. - NodeItr nodesBegin() { return nodes.begin(); } - - /// \brief Begin const iterator for node set. - ConstNodeItr nodesBegin() const { return nodes.begin(); } - - /// \brief End iterator for node set. - NodeItr nodesEnd() { return nodes.end(); } - - /// \brief End const iterator for node set. - ConstNodeItr nodesEnd() const { return nodes.end(); } - - /// \brief Begin iterator for edge set. - EdgeItr edgesBegin() { return edges.begin(); } - - /// \brief End iterator for edge set. - EdgeItr edgesEnd() { return edges.end(); } - - /// \brief Get begin iterator for adjacent edge set. - /// @param nItr Node iterator. - /// @return Begin iterator for the set of edges connected to the given node. - AdjEdgeItr adjEdgesBegin(NodeItr nItr) { - return getNode(nItr).edgesBegin(); - } - - /// \brief Get end iterator for adjacent edge set. - /// @param nItr Node iterator. - /// @return End iterator for the set of edges connected to the given node. - AdjEdgeItr adjEdgesEnd(NodeItr nItr) { - return getNode(nItr).edgesEnd(); - } - - /// \brief Get the first node connected to this edge. - /// @param eItr Edge iterator. - /// @return The first node connected to the given edge. - NodeItr getEdgeNode1(EdgeItr eItr) { - return getEdge(eItr).getNode1(); - } - - /// \brief Get the second node connected to this edge. - /// @param eItr Edge iterator. - /// @return The second node connected to the given edge. - NodeItr getEdgeNode2(EdgeItr eItr) { - return getEdge(eItr).getNode2(); - } - - /// \brief Get the "other" node connected to this edge. - /// @param eItr Edge iterator. - /// @param nItr Node iterator for the "given" node. - /// @return The iterator for the "other" node connected to this edge. - NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) { - EdgeEntry &e = getEdge(eItr); - if (e.getNode1() == nItr) { - return e.getNode2(); - } // else - return e.getNode1(); - } - - /// \brief Get the edge connecting two nodes. - /// @param n1Itr First node iterator. - /// @param n2Itr Second node iterator. - /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists, - /// otherwise returns edgesEnd(). - EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) { - for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr); - aeItr != aeEnd; ++aeItr) { - if ((getEdgeNode1(*aeItr) == n2Itr) || - (getEdgeNode2(*aeItr) == n2Itr)) { - return *aeItr; - } - } - return edges.end(); - } - - /// \brief Remove a node from the graph. - /// @param nItr Node iterator. - void removeNode(NodeItr nItr) { - NodeEntry &n = getNode(nItr); - for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) { - EdgeItr eItr = *itr; - ++itr; - removeEdge(eItr); - } - nodes.erase(nItr); - --numNodes; - } - - /// \brief Remove an edge from the graph. - /// @param eItr Edge iterator. - void removeEdge(EdgeItr eItr) { - EdgeEntry &e = getEdge(eItr); - NodeEntry &n1 = getNode(e.getNode1()); - NodeEntry &n2 = getNode(e.getNode2()); - n1.removeEdge(e.getNode1AEItr()); - n2.removeEdge(e.getNode2AEItr()); - edges.erase(eItr); - --numEdges; - } - - /// \brief Remove all nodes and edges from the graph. - void clear() { - nodes.clear(); - edges.clear(); - numNodes = numEdges = 0; - } - - /// \brief Print a representation of this graph in DOT format. - /// @param os Output stream to print on. - template <typename OStream> - void printDot(OStream &os) { - - os << "graph {\n"; - - for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd(); - nodeItr != nodeEnd; ++nodeItr) { - - os << " node" << nodeItr << " [ label=\"" - << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n"; - } - - os << " edge [ len=" << getNumNodes() << " ]\n"; - - for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd(); - edgeItr != edgeEnd; ++edgeItr) { - - os << " node" << getEdgeNode1(edgeItr) - << " -- node" << getEdgeNode2(edgeItr) - << " [ label=\""; - - const Matrix &edgeCosts = getEdgeCosts(edgeItr); - - for (unsigned i = 0; i < edgeCosts.getRows(); ++i) { - os << edgeCosts.getRowAsVector(i) << "\\n"; - } - os << "\" ]\n"; - } - os << "}\n"; - } - - }; - - class NodeItrComparator { - public: - bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const { - return &*n1 < &*n2; - } - - bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const { - return &*n1 < &*n2; - } - }; - - class EdgeItrCompartor { - public: - bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const { - return &*e1 < &*e2; - } - - bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const { - return &*e1 < &*e2; - } - }; - - void Graph::copyFrom(const Graph &other) { - std::map<Graph::ConstNodeItr, Graph::NodeItr, - NodeItrComparator> nodeMap; - - for (Graph::ConstNodeItr nItr = other.nodesBegin(), - nEnd = other.nodesEnd(); - nItr != nEnd; ++nItr) { - nodeMap[nItr] = addNode(other.getNodeCosts(nItr)); - } - - } - -} - -#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h deleted file mode 100644 index 791c227f0d07..000000000000 --- a/lib/CodeGen/PBQP/HeuristicBase.h +++ /dev/null @@ -1,246 +0,0 @@ -//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H -#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H - -#include "HeuristicSolver.h" - -namespace PBQP { - - /// \brief Abstract base class for heuristic implementations. - /// - /// This class provides a handy base for heuristic implementations with common - /// solver behaviour implemented for a number of methods. - /// - /// To implement your own heuristic using this class as a base you'll have to - /// implement, as a minimum, the following methods: - /// <ul> - /// <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the - /// heuristic reduction list. - /// <li> void heuristicReduce() : Perform a single heuristic reduction. - /// <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent) - /// change to the cost matrix on the given edge (by R2). - /// <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new - /// costs on the given edge. - /// <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new - /// edge into the PBQP graph (by R2). - /// <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the - /// disconnection of the given edge from the given node. - /// <li> A constructor for your derived class : to pass back a reference to - /// the solver which is using this heuristic. - /// </ul> - /// - /// These methods are implemented in this class for documentation purposes, - /// but will assert if called. - /// - /// Note that this class uses the curiously recursive template idiom to - /// forward calls to the derived class. These methods need not be made - /// virtual, and indeed probably shouldn't for performance reasons. - /// - /// You'll also need to provide NodeData and EdgeData structs in your class. - /// These can be used to attach data relevant to your heuristic to each - /// node/edge in the PBQP graph. - - template <typename HImpl> - class HeuristicBase { - private: - - typedef std::list<Graph::NodeItr> OptimalList; - - HeuristicSolverImpl<HImpl> &s; - Graph &g; - OptimalList optimalList; - - // Return a reference to the derived heuristic. - HImpl& impl() { return static_cast<HImpl&>(*this); } - - // Add the given node to the optimal reductions list. Keep an iterator to - // its location for fast removal. - void addToOptimalReductionList(Graph::NodeItr nItr) { - optimalList.insert(optimalList.end(), nItr); - } - - public: - - /// \brief Construct an instance with a reference to the given solver. - /// @param solver The solver which is using this heuristic instance. - HeuristicBase(HeuristicSolverImpl<HImpl> &solver) - : s(solver), g(s.getGraph()) { } - - /// \brief Get the solver which is using this heuristic instance. - /// @return The solver which is using this heuristic instance. - /// - /// You can use this method to get access to the solver in your derived - /// heuristic implementation. - HeuristicSolverImpl<HImpl>& getSolver() { return s; } - - /// \brief Get the graph representing the problem to be solved. - /// @return The graph representing the problem to be solved. - Graph& getGraph() { return g; } - - /// \brief Tell the solver to simplify the graph before the reduction phase. - /// @return Whether or not the solver should run a simplification phase - /// prior to the main setup and reduction. - /// - /// HeuristicBase returns true from this method as it's a sensible default, - /// however you can over-ride it in your derived class if you want different - /// behaviour. - bool solverRunSimplify() const { return true; } - - /// \brief Decide whether a node should be optimally or heuristically - /// reduced. - /// @return Whether or not the given node should be listed for optimal - /// reduction (via R0, R1 or R2). - /// - /// HeuristicBase returns true for any node with degree less than 3. This is - /// sane and sensible for many situations, but not all. You can over-ride - /// this method in your derived class if you want a different selection - /// criteria. Note however that your criteria for selecting optimal nodes - /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or - /// higher should not be selected under any circumstances. - bool shouldOptimallyReduce(Graph::NodeItr nItr) { - if (g.getNodeDegree(nItr) < 3) - return true; - // else - return false; - } - - /// \brief Add the given node to the list of nodes to be optimally reduced. - /// @return nItr Node iterator to be added. - /// - /// You probably don't want to over-ride this, except perhaps to record - /// statistics before calling this implementation. HeuristicBase relies on - /// its behaviour. - void addToOptimalReduceList(Graph::NodeItr nItr) { - optimalList.push_back(nItr); - } - - /// \brief Initialise the heuristic. - /// - /// HeuristicBase iterates over all nodes in the problem and adds them to - /// the appropriate list using addToOptimalReduceList or - /// addToHeuristicReduceList based on the result of shouldOptimallyReduce. - /// - /// This behaviour should be fine for most situations. - void setup() { - for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); - nItr != nEnd; ++nItr) { - if (impl().shouldOptimallyReduce(nItr)) { - addToOptimalReduceList(nItr); - } else { - impl().addToHeuristicReduceList(nItr); - } - } - } - - /// \brief Optimally reduce one of the nodes in the optimal reduce list. - /// @return True if a reduction takes place, false if the optimal reduce - /// list is empty. - /// - /// Selects a node from the optimal reduce list and removes it, applying - /// R0, R1 or R2 as appropriate based on the selected node's degree. - bool optimalReduce() { - if (optimalList.empty()) - return false; - - Graph::NodeItr nItr = optimalList.front(); - optimalList.pop_front(); - - switch (s.getSolverDegree(nItr)) { - case 0: s.applyR0(nItr); break; - case 1: s.applyR1(nItr); break; - case 2: s.applyR2(nItr); break; - default: assert(false && - "Optimal reductions of degree > 2 nodes is invalid."); - } - - return true; - } - - /// \brief Perform the PBQP reduction process. - /// - /// Reduces the problem to the empty graph by repeated application of the - /// reduction rules R0, R1, R2 and RN. - /// R0, R1 or R2 are always applied if possible before RN is used. - void reduce() { - bool finished = false; - - while (!finished) { - if (!optimalReduce()) { - if (impl().heuristicReduce()) { - getSolver().recordRN(); - } else { - finished = true; - } - } - } - } - - /// \brief Add a node to the heuristic reduce list. - /// @param nItr Node iterator to add to the heuristic reduce list. - void addToHeuristicList(Graph::NodeItr nItr) { - assert(false && "Must be implemented in derived class."); - } - - /// \brief Heuristically reduce one of the nodes in the heuristic - /// reduce list. - /// @return True if a reduction takes place, false if the heuristic reduce - /// list is empty. - void heuristicReduce() { - assert(false && "Must be implemented in derived class."); - } - - /// \brief Prepare a change in the costs on the given edge. - /// @param eItr Edge iterator. - void preUpdateEdgeCosts(Graph::EdgeItr eItr) { - assert(false && "Must be implemented in derived class."); - } - - /// \brief Handle the change in the costs on the given edge. - /// @param eItr Edge iterator. - void postUpdateEdgeCostts(Graph::EdgeItr eItr) { - assert(false && "Must be implemented in derived class."); - } - - /// \brief Handle the addition of a new edge into the PBQP graph. - /// @param eItr Edge iterator for the added edge. - void handleAddEdge(Graph::EdgeItr eItr) { - assert(false && "Must be implemented in derived class."); - } - - /// \brief Handle disconnection of an edge from a node. - /// @param eItr Edge iterator for edge being disconnected. - /// @param nItr Node iterator for the node being disconnected from. - /// - /// Edges are frequently removed due to the removal of a node. This - /// method allows for the effect to be computed only for the remaining - /// node in the graph. - void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { - assert(false && "Must be implemented in derived class."); - } - - /// \brief Clean up any structures used by HeuristicBase. - /// - /// At present this just performs a sanity check: that the optimal reduce - /// list is empty now that reduction has completed. - /// - /// If your derived class has more complex structures which need tearing - /// down you should over-ride this method but include a call back to this - /// implementation. - void cleanup() { - assert(optimalList.empty() && "Nodes left over in optimal reduce list?"); - } - - }; - -} - - -#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h deleted file mode 100644 index 35514f967478..000000000000 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ /dev/null @@ -1,616 +0,0 @@ -//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Heuristic PBQP solver. This solver is able to perform optimal reductions for -// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is -// used to select a node for reduction. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H -#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H - -#include "Graph.h" -#include "Solution.h" -#include <vector> -#include <limits> - -namespace PBQP { - - /// \brief Heuristic PBQP solver implementation. - /// - /// This class should usually be created (and destroyed) indirectly via a call - /// to HeuristicSolver<HImpl>::solve(Graph&). - /// See the comments for HeuristicSolver. - /// - /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules, - /// backpropagation phase, and maintains the internal copy of the graph on - /// which the reduction is carried out (the original being kept to facilitate - /// backpropagation). - template <typename HImpl> - class HeuristicSolverImpl { - private: - - typedef typename HImpl::NodeData HeuristicNodeData; - typedef typename HImpl::EdgeData HeuristicEdgeData; - - typedef std::list<Graph::EdgeItr> SolverEdges; - - public: - - /// \brief Iterator type for edges in the solver graph. - typedef SolverEdges::iterator SolverEdgeItr; - - private: - - class NodeData { - public: - NodeData() : solverDegree(0) {} - - HeuristicNodeData& getHeuristicData() { return hData; } - - SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) { - ++solverDegree; - return solverEdges.insert(solverEdges.end(), eItr); - } - - void removeSolverEdge(SolverEdgeItr seItr) { - --solverDegree; - solverEdges.erase(seItr); - } - - SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); } - SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); } - unsigned getSolverDegree() const { return solverDegree; } - void clearSolverEdges() { - solverDegree = 0; - solverEdges.clear(); - } - - private: - HeuristicNodeData hData; - unsigned solverDegree; - SolverEdges solverEdges; - }; - - class EdgeData { - public: - HeuristicEdgeData& getHeuristicData() { return hData; } - - void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) { - this->n1SolverEdgeItr = n1SolverEdgeItr; - } - - SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; } - - void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){ - this->n2SolverEdgeItr = n2SolverEdgeItr; - } - - SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; } - - private: - - HeuristicEdgeData hData; - SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr; - }; - - Graph &g; - HImpl h; - Solution s; - std::vector<Graph::NodeItr> stack; - - typedef std::list<NodeData> NodeDataList; - NodeDataList nodeDataList; - - typedef std::list<EdgeData> EdgeDataList; - EdgeDataList edgeDataList; - - public: - - /// \brief Construct a heuristic solver implementation to solve the given - /// graph. - /// @param g The graph representing the problem instance to be solved. - HeuristicSolverImpl(Graph &g) : g(g), h(*this) {} - - /// \brief Get the graph being solved by this solver. - /// @return The graph representing the problem instance being solved by this - /// solver. - Graph& getGraph() { return g; } - - /// \brief Get the heuristic data attached to the given node. - /// @param nItr Node iterator. - /// @return The heuristic data attached to the given node. - HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) { - return getSolverNodeData(nItr).getHeuristicData(); - } - - /// \brief Get the heuristic data attached to the given edge. - /// @param eItr Edge iterator. - /// @return The heuristic data attached to the given node. - HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) { - return getSolverEdgeData(eItr).getHeuristicData(); - } - - /// \brief Begin iterator for the set of edges adjacent to the given node in - /// the solver graph. - /// @param nItr Node iterator. - /// @return Begin iterator for the set of edges adjacent to the given node - /// in the solver graph. - SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) { - return getSolverNodeData(nItr).solverEdgesBegin(); - } - - /// \brief End iterator for the set of edges adjacent to the given node in - /// the solver graph. - /// @param nItr Node iterator. - /// @return End iterator for the set of edges adjacent to the given node in - /// the solver graph. - SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) { - return getSolverNodeData(nItr).solverEdgesEnd(); - } - - /// \brief Remove a node from the solver graph. - /// @param eItr Edge iterator for edge to be removed. - /// - /// Does <i>not</i> notify the heuristic of the removal. That should be - /// done manually if necessary. - void removeSolverEdge(Graph::EdgeItr eItr) { - EdgeData &eData = getSolverEdgeData(eItr); - NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)), - &n2Data = getSolverNodeData(g.getEdgeNode2(eItr)); - - n1Data.removeSolverEdge(eData.getN1SolverEdgeItr()); - n2Data.removeSolverEdge(eData.getN2SolverEdgeItr()); - } - - /// \brief Compute a solution to the PBQP problem instance with which this - /// heuristic solver was constructed. - /// @return A solution to the PBQP problem. - /// - /// Performs the full PBQP heuristic solver algorithm, including setup, - /// calls to the heuristic (which will call back to the reduction rules in - /// this class), and cleanup. - Solution computeSolution() { - setup(); - h.setup(); - h.reduce(); - backpropagate(); - h.cleanup(); - cleanup(); - return s; - } - - /// \brief Add to the end of the stack. - /// @param nItr Node iterator to add to the reduction stack. - void pushToStack(Graph::NodeItr nItr) { - getSolverNodeData(nItr).clearSolverEdges(); - stack.push_back(nItr); - } - - /// \brief Returns the solver degree of the given node. - /// @param nItr Node iterator for which degree is requested. - /// @return Node degree in the <i>solver</i> graph (not the original graph). - unsigned getSolverDegree(Graph::NodeItr nItr) { - return getSolverNodeData(nItr).getSolverDegree(); - } - - /// \brief Set the solution of the given node. - /// @param nItr Node iterator to set solution for. - /// @param selection Selection for node. - void setSolution(const Graph::NodeItr &nItr, unsigned selection) { - s.setSelection(nItr, selection); - - for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr), - aeEnd = g.adjEdgesEnd(nItr); - aeItr != aeEnd; ++aeItr) { - Graph::EdgeItr eItr(*aeItr); - Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr)); - getSolverNodeData(anItr).addSolverEdge(eItr); - } - } - - /// \brief Apply rule R0. - /// @param nItr Node iterator for node to apply R0 to. - /// - /// Node will be automatically pushed to the solver stack. - void applyR0(Graph::NodeItr nItr) { - assert(getSolverNodeData(nItr).getSolverDegree() == 0 && - "R0 applied to node with degree != 0."); - - // Nothing to do. Just push the node onto the reduction stack. - pushToStack(nItr); - - s.recordR0(); - } - - /// \brief Apply rule R1. - /// @param xnItr Node iterator for node to apply R1 to. - /// - /// Node will be automatically pushed to the solver stack. - void applyR1(Graph::NodeItr xnItr) { - NodeData &nd = getSolverNodeData(xnItr); - assert(nd.getSolverDegree() == 1 && - "R1 applied to node with degree != 1."); - - Graph::EdgeItr eItr = *nd.solverEdgesBegin(); - - const Matrix &eCosts = g.getEdgeCosts(eItr); - const Vector &xCosts = g.getNodeCosts(xnItr); - - // Duplicate a little to avoid transposing matrices. - if (xnItr == g.getEdgeNode1(eItr)) { - Graph::NodeItr ynItr = g.getEdgeNode2(eItr); - Vector &yCosts = g.getNodeCosts(ynItr); - for (unsigned j = 0; j < yCosts.getLength(); ++j) { - PBQPNum min = eCosts[0][j] + xCosts[0]; - for (unsigned i = 1; i < xCosts.getLength(); ++i) { - PBQPNum c = eCosts[i][j] + xCosts[i]; - if (c < min) - min = c; - } - yCosts[j] += min; - } - h.handleRemoveEdge(eItr, ynItr); - } else { - Graph::NodeItr ynItr = g.getEdgeNode1(eItr); - Vector &yCosts = g.getNodeCosts(ynItr); - for (unsigned i = 0; i < yCosts.getLength(); ++i) { - PBQPNum min = eCosts[i][0] + xCosts[0]; - for (unsigned j = 1; j < xCosts.getLength(); ++j) { - PBQPNum c = eCosts[i][j] + xCosts[j]; - if (c < min) - min = c; - } - yCosts[i] += min; - } - h.handleRemoveEdge(eItr, ynItr); - } - removeSolverEdge(eItr); - assert(nd.getSolverDegree() == 0 && - "Degree 1 with edge removed should be 0."); - pushToStack(xnItr); - s.recordR1(); - } - - /// \brief Apply rule R2. - /// @param xnItr Node iterator for node to apply R2 to. - /// - /// Node will be automatically pushed to the solver stack. - void applyR2(Graph::NodeItr xnItr) { - assert(getSolverNodeData(xnItr).getSolverDegree() == 2 && - "R2 applied to node with degree != 2."); - - NodeData &nd = getSolverNodeData(xnItr); - const Vector &xCosts = g.getNodeCosts(xnItr); - - SolverEdgeItr aeItr = nd.solverEdgesBegin(); - Graph::EdgeItr yxeItr = *aeItr, - zxeItr = *(++aeItr); - - Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr), - znItr = g.getEdgeOtherNode(zxeItr, xnItr); - - bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr), - flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr); - - const Matrix *yxeCosts = flipEdge1 ? - new Matrix(g.getEdgeCosts(yxeItr).transpose()) : - &g.getEdgeCosts(yxeItr); - - const Matrix *zxeCosts = flipEdge2 ? - new Matrix(g.getEdgeCosts(zxeItr).transpose()) : - &g.getEdgeCosts(zxeItr); - - unsigned xLen = xCosts.getLength(), - yLen = yxeCosts->getRows(), - zLen = zxeCosts->getRows(); - - Matrix delta(yLen, zLen); - - for (unsigned i = 0; i < yLen; ++i) { - for (unsigned j = 0; j < zLen; ++j) { - PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0]; - for (unsigned k = 1; k < xLen; ++k) { - PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k]; - if (c < min) { - min = c; - } - } - delta[i][j] = min; - } - } - - if (flipEdge1) - delete yxeCosts; - - if (flipEdge2) - delete zxeCosts; - - Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr); - bool addedEdge = false; - - if (yzeItr == g.edgesEnd()) { - yzeItr = g.addEdge(ynItr, znItr, delta); - addedEdge = true; - } else { - Matrix &yzeCosts = g.getEdgeCosts(yzeItr); - h.preUpdateEdgeCosts(yzeItr); - if (ynItr == g.getEdgeNode1(yzeItr)) { - yzeCosts += delta; - } else { - yzeCosts += delta.transpose(); - } - } - - bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr); - - if (!addedEdge) { - // If we modified the edge costs let the heuristic know. - h.postUpdateEdgeCosts(yzeItr); - } - - if (nullCostEdge) { - // If this edge ended up null remove it. - if (!addedEdge) { - // We didn't just add it, so we need to notify the heuristic - // and remove it from the solver. - h.handleRemoveEdge(yzeItr, ynItr); - h.handleRemoveEdge(yzeItr, znItr); - removeSolverEdge(yzeItr); - } - g.removeEdge(yzeItr); - } else if (addedEdge) { - // If the edge was added, and non-null, finish setting it up, add it to - // the solver & notify heuristic. - edgeDataList.push_back(EdgeData()); - g.setEdgeData(yzeItr, &edgeDataList.back()); - addSolverEdge(yzeItr); - h.handleAddEdge(yzeItr); - } - - h.handleRemoveEdge(yxeItr, ynItr); - removeSolverEdge(yxeItr); - h.handleRemoveEdge(zxeItr, znItr); - removeSolverEdge(zxeItr); - - pushToStack(xnItr); - s.recordR2(); - } - - /// \brief Record an application of the RN rule. - /// - /// For use by the HeuristicBase. - void recordRN() { s.recordRN(); } - - private: - - NodeData& getSolverNodeData(Graph::NodeItr nItr) { - return *static_cast<NodeData*>(g.getNodeData(nItr)); - } - - EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) { - return *static_cast<EdgeData*>(g.getEdgeData(eItr)); - } - - void addSolverEdge(Graph::EdgeItr eItr) { - EdgeData &eData = getSolverEdgeData(eItr); - NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)), - &n2Data = getSolverNodeData(g.getEdgeNode2(eItr)); - - eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr)); - eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr)); - } - - void setup() { - if (h.solverRunSimplify()) { - simplify(); - } - - // Create node data objects. - for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); - nItr != nEnd; ++nItr) { - nodeDataList.push_back(NodeData()); - g.setNodeData(nItr, &nodeDataList.back()); - } - - // Create edge data objects. - for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd(); - eItr != eEnd; ++eItr) { - edgeDataList.push_back(EdgeData()); - g.setEdgeData(eItr, &edgeDataList.back()); - addSolverEdge(eItr); - } - } - - void simplify() { - disconnectTrivialNodes(); - eliminateIndependentEdges(); - } - - // Eliminate trivial nodes. - void disconnectTrivialNodes() { - unsigned numDisconnected = 0; - - for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); - nItr != nEnd; ++nItr) { - - if (g.getNodeCosts(nItr).getLength() == 1) { - - std::vector<Graph::EdgeItr> edgesToRemove; - - for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr), - aeEnd = g.adjEdgesEnd(nItr); - aeItr != aeEnd; ++aeItr) { - - Graph::EdgeItr eItr = *aeItr; - - if (g.getEdgeNode1(eItr) == nItr) { - Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr); - g.getNodeCosts(otherNodeItr) += - g.getEdgeCosts(eItr).getRowAsVector(0); - } - else { - Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr); - g.getNodeCosts(otherNodeItr) += - g.getEdgeCosts(eItr).getColAsVector(0); - } - - edgesToRemove.push_back(eItr); - } - - if (!edgesToRemove.empty()) - ++numDisconnected; - - while (!edgesToRemove.empty()) { - g.removeEdge(edgesToRemove.back()); - edgesToRemove.pop_back(); - } - } - } - } - - void eliminateIndependentEdges() { - std::vector<Graph::EdgeItr> edgesToProcess; - unsigned numEliminated = 0; - - for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd(); - eItr != eEnd; ++eItr) { - edgesToProcess.push_back(eItr); - } - - while (!edgesToProcess.empty()) { - if (tryToEliminateEdge(edgesToProcess.back())) - ++numEliminated; - edgesToProcess.pop_back(); - } - } - - bool tryToEliminateEdge(Graph::EdgeItr eItr) { - if (tryNormaliseEdgeMatrix(eItr)) { - g.removeEdge(eItr); - return true; - } - return false; - } - - bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) { - - const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity(); - - Matrix &edgeCosts = g.getEdgeCosts(eItr); - Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)), - &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr)); - - for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { - PBQPNum rowMin = infinity; - - for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { - if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin) - rowMin = edgeCosts[r][c]; - } - - uCosts[r] += rowMin; - - if (rowMin != infinity) { - edgeCosts.subFromRow(r, rowMin); - } - else { - edgeCosts.setRow(r, 0); - } - } - - for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { - PBQPNum colMin = infinity; - - for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { - if (uCosts[r] != infinity && edgeCosts[r][c] < colMin) - colMin = edgeCosts[r][c]; - } - - vCosts[c] += colMin; - - if (colMin != infinity) { - edgeCosts.subFromCol(c, colMin); - } - else { - edgeCosts.setCol(c, 0); - } - } - - return edgeCosts.isZero(); - } - - void backpropagate() { - while (!stack.empty()) { - computeSolution(stack.back()); - stack.pop_back(); - } - } - - void computeSolution(Graph::NodeItr nItr) { - - NodeData &nodeData = getSolverNodeData(nItr); - - Vector v(g.getNodeCosts(nItr)); - - // Solve based on existing solved edges. - for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(), - solvedEdgeEnd = nodeData.solverEdgesEnd(); - solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) { - - Graph::EdgeItr eItr(*solvedEdgeItr); - Matrix &edgeCosts = g.getEdgeCosts(eItr); - - if (nItr == g.getEdgeNode1(eItr)) { - Graph::NodeItr adjNode(g.getEdgeNode2(eItr)); - unsigned adjSolution = s.getSelection(adjNode); - v += edgeCosts.getColAsVector(adjSolution); - } - else { - Graph::NodeItr adjNode(g.getEdgeNode1(eItr)); - unsigned adjSolution = s.getSelection(adjNode); - v += edgeCosts.getRowAsVector(adjSolution); - } - - } - - setSolution(nItr, v.minIndex()); - } - - void cleanup() { - h.cleanup(); - nodeDataList.clear(); - edgeDataList.clear(); - } - }; - - /// \brief PBQP heuristic solver class. - /// - /// Given a PBQP Graph g representing a PBQP problem, you can find a solution - /// by calling - /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt> - /// - /// The choice of heuristic for the H parameter will affect both the solver - /// speed and solution quality. The heuristic should be chosen based on the - /// nature of the problem being solved. - /// Currently the only solver included with LLVM is the Briggs heuristic for - /// register allocation. - template <typename HImpl> - class HeuristicSolver { - public: - static Solution solve(Graph &g) { - HeuristicSolverImpl<HImpl> hs(g); - return hs.computeSolution(); - } - }; - -} - -#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h deleted file mode 100644 index 18eaf7c0da9b..000000000000 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ /dev/null @@ -1,460 +0,0 @@ -//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class implements the Briggs test for "allocability" of nodes in a -// PBQP graph representing a register allocation problem. Nodes which can be -// proven allocable (by a safe and relatively accurate test) are removed from -// the PBQP graph first. If no provably allocable node is present in the graph -// then the node with the minimal spill-cost to degree ratio is removed. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H -#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H - -#include "../HeuristicSolver.h" -#include "../HeuristicBase.h" - -#include <set> -#include <limits> - -namespace PBQP { - namespace Heuristics { - - /// \brief PBQP Heuristic which applies an allocability test based on - /// Briggs. - /// - /// This heuristic assumes that the elements of cost vectors in the PBQP - /// problem represent storage options, with the first being the spill - /// option and subsequent elements representing legal registers for the - /// corresponding node. Edge cost matrices are likewise assumed to represent - /// register constraints. - /// If one or more nodes can be proven allocable by this heuristic (by - /// inspection of their constraint matrices) then the allocable node of - /// highest degree is selected for the next reduction and pushed to the - /// solver stack. If no nodes can be proven allocable then the node with - /// the lowest estimated spill cost is selected and push to the solver stack - /// instead. - /// - /// This implementation is built on top of HeuristicBase. - class Briggs : public HeuristicBase<Briggs> { - private: - - class LinkDegreeComparator { - public: - LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {} - bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { - if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr)) - return true; - return false; - } - private: - HeuristicSolverImpl<Briggs> *s; - }; - - class SpillCostComparator { - public: - SpillCostComparator(HeuristicSolverImpl<Briggs> &s) - : s(&s), g(&s.getGraph()) {} - bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const { - PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr), - cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr); - if (cost1 < cost2) - return true; - return false; - } - - private: - HeuristicSolverImpl<Briggs> *s; - Graph *g; - }; - - typedef std::list<Graph::NodeItr> RNAllocableList; - typedef RNAllocableList::iterator RNAllocableListItr; - - typedef std::list<Graph::NodeItr> RNUnallocableList; - typedef RNUnallocableList::iterator RNUnallocableListItr; - - public: - - struct NodeData { - typedef std::vector<unsigned> UnsafeDegreesArray; - bool isHeuristic, isAllocable, isInitialized; - unsigned numDenied, numSafe; - UnsafeDegreesArray unsafeDegrees; - RNAllocableListItr rnaItr; - RNUnallocableListItr rnuItr; - - NodeData() - : isHeuristic(false), isAllocable(false), isInitialized(false), - numDenied(0), numSafe(0) { } - }; - - struct EdgeData { - typedef std::vector<unsigned> UnsafeArray; - unsigned worst, reverseWorst; - UnsafeArray unsafe, reverseUnsafe; - bool isUpToDate; - - EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {} - }; - - /// \brief Construct an instance of the Briggs heuristic. - /// @param solver A reference to the solver which is using this heuristic. - Briggs(HeuristicSolverImpl<Briggs> &solver) : - HeuristicBase<Briggs>(solver) {} - - /// \brief Determine whether a node should be reduced using optimal - /// reduction. - /// @param nItr Node iterator to be considered. - /// @return True if the given node should be optimally reduced, false - /// otherwise. - /// - /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one - /// exception. Nodes whose spill cost (element 0 of their cost vector) is - /// infinite are checked for allocability first. Allocable nodes may be - /// optimally reduced, but nodes whose allocability cannot be proven are - /// selected for heuristic reduction instead. - bool shouldOptimallyReduce(Graph::NodeItr nItr) { - if (getSolver().getSolverDegree(nItr) < 3) { - return true; - } - // else - return false; - } - - /// \brief Add a node to the heuristic reduce list. - /// @param nItr Node iterator to add to the heuristic reduce list. - void addToHeuristicReduceList(Graph::NodeItr nItr) { - NodeData &nd = getHeuristicNodeData(nItr); - initializeNode(nItr); - nd.isHeuristic = true; - if (nd.isAllocable) { - nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr); - } else { - nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr); - } - } - - /// \brief Heuristically reduce one of the nodes in the heuristic - /// reduce list. - /// @return True if a reduction takes place, false if the heuristic reduce - /// list is empty. - /// - /// If the list of allocable nodes is non-empty a node is selected - /// from it and pushed to the stack. Otherwise if the non-allocable list - /// is non-empty a node is selected from it and pushed to the stack. - /// If both lists are empty the method simply returns false with no action - /// taken. - bool heuristicReduce() { - if (!rnAllocableList.empty()) { - RNAllocableListItr rnaItr = - min_element(rnAllocableList.begin(), rnAllocableList.end(), - LinkDegreeComparator(getSolver())); - Graph::NodeItr nItr = *rnaItr; - rnAllocableList.erase(rnaItr); - handleRemoveNode(nItr); - getSolver().pushToStack(nItr); - return true; - } else if (!rnUnallocableList.empty()) { - RNUnallocableListItr rnuItr = - min_element(rnUnallocableList.begin(), rnUnallocableList.end(), - SpillCostComparator(getSolver())); - Graph::NodeItr nItr = *rnuItr; - rnUnallocableList.erase(rnuItr); - handleRemoveNode(nItr); - getSolver().pushToStack(nItr); - return true; - } - // else - return false; - } - - /// \brief Prepare a change in the costs on the given edge. - /// @param eItr Edge iterator. - void preUpdateEdgeCosts(Graph::EdgeItr eItr) { - Graph &g = getGraph(); - Graph::NodeItr n1Itr = g.getEdgeNode1(eItr), - n2Itr = g.getEdgeNode2(eItr); - NodeData &n1 = getHeuristicNodeData(n1Itr), - &n2 = getHeuristicNodeData(n2Itr); - - if (n1.isHeuristic) - subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr)); - if (n2.isHeuristic) - subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr)); - - EdgeData &ed = getHeuristicEdgeData(eItr); - ed.isUpToDate = false; - } - - /// \brief Handle the change in the costs on the given edge. - /// @param eItr Edge iterator. - void postUpdateEdgeCosts(Graph::EdgeItr eItr) { - // This is effectively the same as adding a new edge now, since - // we've factored out the costs of the old one. - handleAddEdge(eItr); - } - - /// \brief Handle the addition of a new edge into the PBQP graph. - /// @param eItr Edge iterator for the added edge. - /// - /// Updates allocability of any nodes connected by this edge which are - /// being managed by the heuristic. If allocability changes they are - /// moved to the appropriate list. - void handleAddEdge(Graph::EdgeItr eItr) { - Graph &g = getGraph(); - Graph::NodeItr n1Itr = g.getEdgeNode1(eItr), - n2Itr = g.getEdgeNode2(eItr); - NodeData &n1 = getHeuristicNodeData(n1Itr), - &n2 = getHeuristicNodeData(n2Itr); - - // If neither node is managed by the heuristic there's nothing to be - // done. - if (!n1.isHeuristic && !n2.isHeuristic) - return; - - // Ok - we need to update at least one node. - computeEdgeContributions(eItr); - - // Update node 1 if it's managed by the heuristic. - if (n1.isHeuristic) { - bool n1WasAllocable = n1.isAllocable; - addEdgeContributions(eItr, n1Itr); - updateAllocability(n1Itr); - if (n1WasAllocable && !n1.isAllocable) { - rnAllocableList.erase(n1.rnaItr); - n1.rnuItr = - rnUnallocableList.insert(rnUnallocableList.end(), n1Itr); - } - } - - // Likewise for node 2. - if (n2.isHeuristic) { - bool n2WasAllocable = n2.isAllocable; - addEdgeContributions(eItr, n2Itr); - updateAllocability(n2Itr); - if (n2WasAllocable && !n2.isAllocable) { - rnAllocableList.erase(n2.rnaItr); - n2.rnuItr = - rnUnallocableList.insert(rnUnallocableList.end(), n2Itr); - } - } - } - - /// \brief Handle disconnection of an edge from a node. - /// @param eItr Edge iterator for edge being disconnected. - /// @param nItr Node iterator for the node being disconnected from. - /// - /// Updates allocability of the given node and, if appropriate, moves the - /// node to a new list. - void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) { - NodeData &nd = getHeuristicNodeData(nItr); - - // If the node is not managed by the heuristic there's nothing to be - // done. - if (!nd.isHeuristic) - return; - - EdgeData &ed = getHeuristicEdgeData(eItr); - (void)ed; - assert(ed.isUpToDate && "Edge data is not up to date."); - - // Update node. - bool ndWasAllocable = nd.isAllocable; - subtractEdgeContributions(eItr, nItr); - updateAllocability(nItr); - - // If the node has gone optimal... - if (shouldOptimallyReduce(nItr)) { - nd.isHeuristic = false; - addToOptimalReduceList(nItr); - if (ndWasAllocable) { - rnAllocableList.erase(nd.rnaItr); - } else { - rnUnallocableList.erase(nd.rnuItr); - } - } else { - // Node didn't go optimal, but we might have to move it - // from "unallocable" to "allocable". - if (!ndWasAllocable && nd.isAllocable) { - rnUnallocableList.erase(nd.rnuItr); - nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr); - } - } - } - - private: - - NodeData& getHeuristicNodeData(Graph::NodeItr nItr) { - return getSolver().getHeuristicNodeData(nItr); - } - - EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) { - return getSolver().getHeuristicEdgeData(eItr); - } - - // Work out what this edge will contribute to the allocability of the - // nodes connected to it. - void computeEdgeContributions(Graph::EdgeItr eItr) { - EdgeData &ed = getHeuristicEdgeData(eItr); - - if (ed.isUpToDate) - return; // Edge data is already up to date. - - Matrix &eCosts = getGraph().getEdgeCosts(eItr); - - unsigned numRegs = eCosts.getRows() - 1, - numReverseRegs = eCosts.getCols() - 1; - - std::vector<unsigned> rowInfCounts(numRegs, 0), - colInfCounts(numReverseRegs, 0); - - ed.worst = 0; - ed.reverseWorst = 0; - ed.unsafe.clear(); - ed.unsafe.resize(numRegs, 0); - ed.reverseUnsafe.clear(); - ed.reverseUnsafe.resize(numReverseRegs, 0); - - for (unsigned i = 0; i < numRegs; ++i) { - for (unsigned j = 0; j < numReverseRegs; ++j) { - if (eCosts[i + 1][j + 1] == - std::numeric_limits<PBQPNum>::infinity()) { - ed.unsafe[i] = 1; - ed.reverseUnsafe[j] = 1; - ++rowInfCounts[i]; - ++colInfCounts[j]; - - if (colInfCounts[j] > ed.worst) { - ed.worst = colInfCounts[j]; - } - - if (rowInfCounts[i] > ed.reverseWorst) { - ed.reverseWorst = rowInfCounts[i]; - } - } - } - } - - ed.isUpToDate = true; - } - - // Add the contributions of the given edge to the given node's - // numDenied and safe members. No action is taken other than to update - // these member values. Once updated these numbers can be used by clients - // to update the node's allocability. - void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) { - EdgeData &ed = getHeuristicEdgeData(eItr); - - assert(ed.isUpToDate && "Using out-of-date edge numbers."); - - NodeData &nd = getHeuristicNodeData(nItr); - unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; - - bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr); - EdgeData::UnsafeArray &unsafe = - nIsNode1 ? ed.unsafe : ed.reverseUnsafe; - nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst; - - for (unsigned r = 0; r < numRegs; ++r) { - if (unsafe[r]) { - if (nd.unsafeDegrees[r]==0) { - --nd.numSafe; - } - ++nd.unsafeDegrees[r]; - } - } - } - - // Subtract the contributions of the given edge to the given node's - // numDenied and safe members. No action is taken other than to update - // these member values. Once updated these numbers can be used by clients - // to update the node's allocability. - void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) { - EdgeData &ed = getHeuristicEdgeData(eItr); - - assert(ed.isUpToDate && "Using out-of-date edge numbers."); - - NodeData &nd = getHeuristicNodeData(nItr); - unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; - - bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr); - EdgeData::UnsafeArray &unsafe = - nIsNode1 ? ed.unsafe : ed.reverseUnsafe; - nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst; - - for (unsigned r = 0; r < numRegs; ++r) { - if (unsafe[r]) { - if (nd.unsafeDegrees[r] == 1) { - ++nd.numSafe; - } - --nd.unsafeDegrees[r]; - } - } - } - - void updateAllocability(Graph::NodeItr nItr) { - NodeData &nd = getHeuristicNodeData(nItr); - unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; - nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0; - } - - void initializeNode(Graph::NodeItr nItr) { - NodeData &nd = getHeuristicNodeData(nItr); - - if (nd.isInitialized) - return; // Node data is already up to date. - - unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1; - - nd.numDenied = 0; - nd.numSafe = numRegs; - nd.unsafeDegrees.resize(numRegs, 0); - - typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr; - - for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr), - aeEnd = getSolver().solverEdgesEnd(nItr); - aeItr != aeEnd; ++aeItr) { - - Graph::EdgeItr eItr = *aeItr; - computeEdgeContributions(eItr); - addEdgeContributions(eItr, nItr); - } - - updateAllocability(nItr); - nd.isInitialized = true; - } - - void handleRemoveNode(Graph::NodeItr xnItr) { - typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr; - std::vector<Graph::EdgeItr> edgesToRemove; - for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr), - aeEnd = getSolver().solverEdgesEnd(xnItr); - aeItr != aeEnd; ++aeItr) { - Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr); - handleRemoveEdge(*aeItr, ynItr); - edgesToRemove.push_back(*aeItr); - } - while (!edgesToRemove.empty()) { - getSolver().removeSolverEdge(edgesToRemove.back()); - edgesToRemove.pop_back(); - } - } - - RNAllocableList rnAllocableList; - RNUnallocableList rnUnallocableList; - }; - - } -} - - -#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H diff --git a/lib/CodeGen/PBQP/Math.h b/lib/CodeGen/PBQP/Math.h deleted file mode 100644 index e7598bf3e3f1..000000000000 --- a/lib/CodeGen/PBQP/Math.h +++ /dev/null @@ -1,288 +0,0 @@ -//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_MATH_H -#define LLVM_CODEGEN_PBQP_MATH_H - -#include <cassert> -#include <algorithm> -#include <functional> - -namespace PBQP { - -typedef float PBQPNum; - -/// \brief PBQP Vector class. -class Vector { - public: - - /// \brief Construct a PBQP vector of the given size. - explicit Vector(unsigned length) : - length(length), data(new PBQPNum[length]) { - } - - /// \brief Construct a PBQP vector with initializer. - Vector(unsigned length, PBQPNum initVal) : - length(length), data(new PBQPNum[length]) { - std::fill(data, data + length, initVal); - } - - /// \brief Copy construct a PBQP vector. - Vector(const Vector &v) : - length(v.length), data(new PBQPNum[length]) { - std::copy(v.data, v.data + length, data); - } - - /// \brief Destroy this vector, return its memory. - ~Vector() { delete[] data; } - - /// \brief Assignment operator. - Vector& operator=(const Vector &v) { - delete[] data; - length = v.length; - data = new PBQPNum[length]; - std::copy(v.data, v.data + length, data); - return *this; - } - - /// \brief Return the length of the vector - unsigned getLength() const { - return length; - } - - /// \brief Element access. - PBQPNum& operator[](unsigned index) { - assert(index < length && "Vector element access out of bounds."); - return data[index]; - } - - /// \brief Const element access. - const PBQPNum& operator[](unsigned index) const { - assert(index < length && "Vector element access out of bounds."); - return data[index]; - } - - /// \brief Add another vector to this one. - Vector& operator+=(const Vector &v) { - assert(length == v.length && "Vector length mismatch."); - std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); - return *this; - } - - /// \brief Subtract another vector from this one. - Vector& operator-=(const Vector &v) { - assert(length == v.length && "Vector length mismatch."); - std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); - return *this; - } - - /// \brief Returns the index of the minimum value in this vector - unsigned minIndex() const { - return std::min_element(data, data + length) - data; - } - - private: - unsigned length; - PBQPNum *data; -}; - -/// \brief Output a textual representation of the given vector on the given -/// output stream. -template <typename OStream> -OStream& operator<<(OStream &os, const Vector &v) { - assert((v.getLength() != 0) && "Zero-length vector badness."); - - os << "[ " << v[0]; - for (unsigned i = 1; i < v.getLength(); ++i) { - os << ", " << v[i]; - } - os << " ]"; - - return os; -} - - -/// \brief PBQP Matrix class -class Matrix { - public: - - /// \brief Construct a PBQP Matrix with the given dimensions. - Matrix(unsigned rows, unsigned cols) : - rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { - } - - /// \brief Construct a PBQP Matrix with the given dimensions and initial - /// value. - Matrix(unsigned rows, unsigned cols, PBQPNum initVal) : - rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { - std::fill(data, data + (rows * cols), initVal); - } - - /// \brief Copy construct a PBQP matrix. - Matrix(const Matrix &m) : - rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) { - std::copy(m.data, m.data + (rows * cols), data); - } - - /// \brief Destroy this matrix, return its memory. - ~Matrix() { delete[] data; } - - /// \brief Assignment operator. - Matrix& operator=(const Matrix &m) { - delete[] data; - rows = m.rows; cols = m.cols; - data = new PBQPNum[rows * cols]; - std::copy(m.data, m.data + (rows * cols), data); - return *this; - } - - /// \brief Return the number of rows in this matrix. - unsigned getRows() const { return rows; } - - /// \brief Return the number of cols in this matrix. - unsigned getCols() const { return cols; } - - /// \brief Matrix element access. - PBQPNum* operator[](unsigned r) { - assert(r < rows && "Row out of bounds."); - return data + (r * cols); - } - - /// \brief Matrix element access. - const PBQPNum* operator[](unsigned r) const { - assert(r < rows && "Row out of bounds."); - return data + (r * cols); - } - - /// \brief Returns the given row as a vector. - Vector getRowAsVector(unsigned r) const { - Vector v(cols); - for (unsigned c = 0; c < cols; ++c) - v[c] = (*this)[r][c]; - return v; - } - - /// \brief Returns the given column as a vector. - Vector getColAsVector(unsigned c) const { - Vector v(rows); - for (unsigned r = 0; r < rows; ++r) - v[r] = (*this)[r][c]; - return v; - } - - /// \brief Reset the matrix to the given value. - Matrix& reset(PBQPNum val = 0) { - std::fill(data, data + (rows * cols), val); - return *this; - } - - /// \brief Set a single row of this matrix to the given value. - Matrix& setRow(unsigned r, PBQPNum val) { - assert(r < rows && "Row out of bounds."); - std::fill(data + (r * cols), data + ((r + 1) * cols), val); - return *this; - } - - /// \brief Set a single column of this matrix to the given value. - Matrix& setCol(unsigned c, PBQPNum val) { - assert(c < cols && "Column out of bounds."); - for (unsigned r = 0; r < rows; ++r) - (*this)[r][c] = val; - return *this; - } - - /// \brief Matrix transpose. - Matrix transpose() const { - Matrix m(cols, rows); - for (unsigned r = 0; r < rows; ++r) - for (unsigned c = 0; c < cols; ++c) - m[c][r] = (*this)[r][c]; - return m; - } - - /// \brief Returns the diagonal of the matrix as a vector. - /// - /// Matrix must be square. - Vector diagonalize() const { - assert(rows == cols && "Attempt to diagonalize non-square matrix."); - - Vector v(rows); - for (unsigned r = 0; r < rows; ++r) - v[r] = (*this)[r][r]; - return v; - } - - /// \brief Add the given matrix to this one. - Matrix& operator+=(const Matrix &m) { - assert(rows == m.rows && cols == m.cols && - "Matrix dimensions mismatch."); - std::transform(data, data + (rows * cols), m.data, data, - std::plus<PBQPNum>()); - return *this; - } - - /// \brief Returns the minimum of the given row - PBQPNum getRowMin(unsigned r) const { - assert(r < rows && "Row out of bounds"); - return *std::min_element(data + (r * cols), data + ((r + 1) * cols)); - } - - /// \brief Returns the minimum of the given column - PBQPNum getColMin(unsigned c) const { - PBQPNum minElem = (*this)[0][c]; - for (unsigned r = 1; r < rows; ++r) - if ((*this)[r][c] < minElem) minElem = (*this)[r][c]; - return minElem; - } - - /// \brief Subtracts the given scalar from the elements of the given row. - Matrix& subFromRow(unsigned r, PBQPNum val) { - assert(r < rows && "Row out of bounds"); - std::transform(data + (r * cols), data + ((r + 1) * cols), - data + (r * cols), - std::bind2nd(std::minus<PBQPNum>(), val)); - return *this; - } - - /// \brief Subtracts the given scalar from the elements of the given column. - Matrix& subFromCol(unsigned c, PBQPNum val) { - for (unsigned r = 0; r < rows; ++r) - (*this)[r][c] -= val; - return *this; - } - - /// \brief Returns true if this is a zero matrix. - bool isZero() const { - return find_if(data, data + (rows * cols), - std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) == - data + (rows * cols); - } - - private: - unsigned rows, cols; - PBQPNum *data; -}; - -/// \brief Output a textual representation of the given matrix on the given -/// output stream. -template <typename OStream> -OStream& operator<<(OStream &os, const Matrix &m) { - - assert((m.getRows() != 0) && "Zero-row matrix badness."); - - for (unsigned i = 0; i < m.getRows(); ++i) { - os << m.getRowAsVector(i); - } - - return os; -} - -} - -#endif // LLVM_CODEGEN_PBQP_MATH_H diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h deleted file mode 100644 index 047fd04c7cb8..000000000000 --- a/lib/CodeGen/PBQP/Solution.h +++ /dev/null @@ -1,89 +0,0 @@ -//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// PBQP Solution class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H -#define LLVM_CODEGEN_PBQP_SOLUTION_H - -#include "Math.h" -#include "Graph.h" - -#include <map> - -namespace PBQP { - - /// \brief Represents a solution to a PBQP problem. - /// - /// To get the selection for each node in the problem use the getSelection method. - class Solution { - private: - - typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap; - SelectionsMap selections; - - unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions; - - public: - - /// \brief Number of nodes for which selections have been made. - /// @return Number of nodes for which selections have been made. - unsigned numNodes() const { return selections.size(); } - - /// \brief Records a reduction via the R0 rule. Should be called from the - /// solver only. - void recordR0() { ++r0Reductions; } - - /// \brief Returns the number of R0 reductions applied to solve the problem. - unsigned numR0Reductions() const { return r0Reductions; } - - /// \brief Records a reduction via the R1 rule. Should be called from the - /// solver only. - void recordR1() { ++r1Reductions; } - - /// \brief Returns the number of R1 reductions applied to solve the problem. - unsigned numR1Reductions() const { return r1Reductions; } - - /// \brief Records a reduction via the R2 rule. Should be called from the - /// solver only. - void recordR2() { ++r2Reductions; } - - /// \brief Returns the number of R2 reductions applied to solve the problem. - unsigned numR2Reductions() const { return r2Reductions; } - - /// \brief Records a reduction via the RN rule. Should be called from the - /// solver only. - void recordRN() { ++ rNReductions; } - - /// \brief Returns the number of RN reductions applied to solve the problem. - unsigned numRNReductions() const { return rNReductions; } - - /// \brief Set the selection for a given node. - /// @param nItr Node iterator. - /// @param selection Selection for nItr. - void setSelection(Graph::NodeItr nItr, unsigned selection) { - selections[nItr] = selection; - } - - /// \brief Get a node's selection. - /// @param nItr Node iterator. - /// @return The selection for nItr; - unsigned getSelection(Graph::NodeItr nItr) const { - SelectionsMap::const_iterator sItr = selections.find(nItr); - assert(sItr != selections.end() && "No selection for node."); - return sItr->second; - } - - }; - -} - -#endif // LLVM_CODEGEN_PBQP_SOLUTION_H diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index d4df4c548711..5f7cf582c960 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "phielim" -#include "PHIElimination.h" +#include "PHIEliminationUtils.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineDominators.h" @@ -34,23 +34,72 @@ #include <map> using namespace llvm; +namespace { + class PHIElimination : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + + public: + static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &Fn); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, + LiveVariables &LV, MachineLoopInfo *MLI); + + typedef std::pair<unsigned, unsigned> BBVRegPair; + typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr*, 4> ImpDefs; + + // Map reusable lowered PHI node -> incoming join register. + typedef DenseMap<MachineInstr*, unsigned, + MachineInstrExpressionTrait> LoweredPHIMap; + LoweredPHIMap LoweredPHIs; + }; +} + STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; INITIALIZE_PASS(PHIElimination, "phi-node-elimination", - "Eliminate PHI nodes for register allocation", false, false); + "Eliminate PHI nodes for register allocation", false, false) -char &llvm::PHIEliminationID = PHIElimination::ID; +char& llvm::PHIEliminationID = PHIElimination::ID; -void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { +void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveVariables>(); AU.addPreserved<MachineDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } -bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) { +bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); bool Changed = false; @@ -93,14 +142,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) { /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. /// -bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF, +bool PHIElimination::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { if (MBB.empty() || !MBB.front().isPHI()) return false; // Quick exit for basic blocks without PHIs. // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). - MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin()); + MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); while (MBB.front().isPHI()) LowerAtomicPHINode(MBB, AfterPHIsIt); @@ -121,58 +170,14 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, return true; } -// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg -// when following the CFG edge to SuccMBB. This needs to be after any def of -// SrcReg, but before any subsequent point where control flow might jump out of -// the basic block. -MachineBasicBlock::iterator -llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, - MachineBasicBlock &SuccMBB, - unsigned SrcReg) { - // Handle the trivial case trivially. - if (MBB.empty()) - return MBB.begin(); - - // Usually, we just want to insert the copy before the first terminator - // instruction. However, for the edge going to a landing pad, we must insert - // the copy before the call/invoke instruction. - if (!SuccMBB.isLandingPad()) - return MBB.getFirstTerminator(); - - // Discover any defs/uses in this basic block. - SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *DefUseMI = &*RI; - if (DefUseMI->getParent() == &MBB) - DefUsesInMBB.insert(DefUseMI); - } - MachineBasicBlock::iterator InsertPoint; - if (DefUsesInMBB.empty()) { - // No defs. Insert the copy at the start of the basic block. - InsertPoint = MBB.begin(); - } else if (DefUsesInMBB.size() == 1) { - // Insert the copy immediately after the def/use. - InsertPoint = *DefUsesInMBB.begin(); - ++InsertPoint; - } else { - // Insert the copy immediately after the last def/use. - InsertPoint = MBB.end(); - while (!DefUsesInMBB.count(&*--InsertPoint)) {} - ++InsertPoint; - } - - // Make sure the copy goes after any phi nodes however. - return SkipPHIsAndLabels(MBB, InsertPoint); -} /// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. /// -void llvm::PHIElimination::LowerAtomicPHINode( +void PHIElimination::LowerAtomicPHINode( MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt) { ++NumAtomic; @@ -207,7 +212,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( IncomingReg = entry; reusedIncoming = true; ++NumReused; - DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); + DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi); } else { const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); @@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Find a safe location to insert the copy, this may be the first terminator // in the block (or end()). MachineBasicBlock::iterator InsertPos = - FindCopyInsertPoint(opBlock, MBB, SrcReg); + findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. if (!reusedIncoming && IncomingReg) @@ -335,6 +340,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( #ifndef NDEBUG for (MachineBasicBlock::iterator TI = llvm::next(Term); TI != opBlock.end(); ++TI) { + if (TI->isDebugValue()) + continue; assert(!TI->readsRegister(SrcReg) && "Terminator instructions cannot use virtual registers unless" "they are the first terminator in a block!"); @@ -343,9 +350,13 @@ void llvm::PHIElimination::LowerAtomicPHINode( } else if (reusedIncoming || !IncomingReg) { // We may have to rewind a bit if we didn't insert a copy this time. KillInst = Term; - while (KillInst != opBlock.begin()) - if ((--KillInst)->readsRegister(SrcReg)) + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) break; + } } else { // We just inserted this copy. KillInst = prior(InsertPos); @@ -371,7 +382,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( /// used in a PHI node. We map that to the BB the vreg is coming from. This is /// used later to determine when the vreg is killed in the BB. /// -void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) { +void PHIElimination::analyzePHINodes(const MachineFunction& MF) { for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); @@ -381,10 +392,10 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) { BBI->getOperand(i).getReg())]; } -bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, - MachineBasicBlock &MBB, - LiveVariables &LV, - MachineLoopInfo *MLI) { +bool PHIElimination::SplitPHIEdges(MachineFunction &MF, + MachineBasicBlock &MBB, + LiveVariables &LV, + MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. @@ -403,10 +414,14 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) { if (!MLI || !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) && - MLI->isLoopHeader(&MBB))) - Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0; + MLI->isLoopHeader(&MBB))) { + if (PreMBB->SplitCriticalEdge(&MBB, this)) { + Changed = true; + ++NumCriticalEdgesSplit; + } + } } } } - return true; + return Changed; } diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h deleted file mode 100644 index 45a97182e71c..000000000000 --- a/lib/CodeGen/PHIElimination.h +++ /dev/null @@ -1,115 +0,0 @@ -//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP -#define LLVM_CODEGEN_PHIELIMINATION_HPP - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -namespace llvm { - class LiveVariables; - class MachineRegisterInfo; - class MachineLoopInfo; - - /// Lower PHI instructions to copies. - class PHIElimination : public MachineFunctionPass { - MachineRegisterInfo *MRI; // Machine register information - - public: - static char ID; // Pass identification, replacement for typeid - PHIElimination() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - - private: - /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions - /// in predecessor basic blocks. - /// - bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - void LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); - - /// analyzePHINodes - Gather information about the PHI nodes in - /// here. In particular, we want to map the number of uses of a virtual - /// register which is used in a PHI node. We map that to the BB the - /// vreg is coming from. This is used later to determine when the vreg - /// is killed in the BB. - /// - void analyzePHINodes(const MachineFunction& Fn); - - /// Split critical edges where necessary for good coalescer performance. - bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI); - - /// SplitCriticalEdge - Split a critical edge from A to B by - /// inserting a new MBB. Update branches in A and PHI instructions - /// in B. Return the new block. - MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A, - MachineBasicBlock *B); - - /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from - /// SrcReg when following the CFG edge to SuccMBB. This needs to be after - /// any def of SrcReg, but before any subsequent point where control flow - /// might jump out of the basic block. - MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, - MachineBasicBlock &SuccMBB, - unsigned SrcReg); - - // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and - // also after any exception handling labels: in landing pads execution - // starts at the label, so any copies placed before it won't be executed! - // We also deal with DBG_VALUEs, which are a bit tricky: - // PHI - // DBG_VALUE - // LABEL - // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it - // needs to be annulled or, better, moved to follow the label, as well. - // PHI - // DBG_VALUE - // no label - // Here it is not a good idea to skip the DBG_VALUE. - // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and - // maximally stupid. - MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - // Rather than assuming that EH labels come before other kinds of labels, - // just skip all labels. - while (I != MBB.end() && - (I->isPHI() || I->isLabel() || I->isDebugValue())) { - if (I->isDebugValue() && I->getNumOperands()==3 && - I->getOperand(0).isReg()) - I->getOperand(0).setReg(0U); - ++I; - } - return I; - } - - typedef std::pair<unsigned, unsigned> BBVRegPair; - typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; - - VRegPHIUse VRegPHIUseCount; - - // Defs of PHI sources which are implicit_def. - SmallPtrSet<MachineInstr*, 4> ImpDefs; - - // Map reusable lowered PHI node -> incoming join register. - typedef DenseMap<MachineInstr*, unsigned, - MachineInstrExpressionTrait> LoweredPHIMap; - LoweredPHIMap LoweredPHIs; - }; - -} - -#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */ diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp new file mode 100644 index 000000000000..10bfdcce6769 --- /dev/null +++ b/lib/CodeGen/PHIEliminationUtils.cpp @@ -0,0 +1,61 @@ +//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PHIEliminationUtils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg +// when following the CFG edge to SuccMBB. This needs to be after any def of +// SrcReg, but before any subsequent point where control flow might jump out of +// the basic block. +MachineBasicBlock::iterator +llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, + unsigned SrcReg) { + // Handle the trivial case trivially. + if (MBB->empty()) + return MBB->begin(); + + // Usually, we just want to insert the copy before the first terminator + // instruction. However, for the edge going to a landing pad, we must insert + // the copy before the call/invoke instruction. + if (!SuccMBB->isLandingPad()) + return MBB->getFirstTerminator(); + + // Discover any defs/uses in this basic block. + SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; + MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg), + RE = MRI.reg_end(); RI != RE; ++RI) { + MachineInstr* DefUseMI = &*RI; + if (DefUseMI->getParent() == MBB) + DefUsesInMBB.insert(DefUseMI); + } + + MachineBasicBlock::iterator InsertPoint; + if (DefUsesInMBB.empty()) { + // No defs. Insert the copy at the start of the basic block. + InsertPoint = MBB->begin(); + } else if (DefUsesInMBB.size() == 1) { + // Insert the copy immediately after the def/use. + InsertPoint = *DefUsesInMBB.begin(); + ++InsertPoint; + } else { + // Insert the copy immediately after the last def/use. + InsertPoint = MBB->end(); + while (!DefUsesInMBB.count(&*--InsertPoint)) {} + ++InsertPoint; + } + + // Make sure the copy goes after any phi nodes however. + return MBB->SkipPHIsAndLabels(InsertPoint); +} diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h new file mode 100644 index 000000000000..9ac47fb4c505 --- /dev/null +++ b/lib/CodeGen/PHIEliminationUtils.h @@ -0,0 +1,25 @@ +//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H +#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H + +#include "llvm/CodeGen/MachineBasicBlock.h" + +namespace llvm { + /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from + /// SrcReg when following the CFG edge to SuccMBB. This needs to be after + /// any def of SrcReg, but before any subsequent point where control flow + /// might jump out of the basic block. + MachineBasicBlock::iterator + findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, + unsigned SrcReg); +} + +#endif diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 17cee46ca16c..5d7123caa017 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -41,7 +41,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -50,8 +52,13 @@ static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization")); +static cl::opt<bool> +DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), + cl::desc("Disable the peephole optimizer")); + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumEliminated, "Number of compares eliminated"); +STATISTIC(NumImmFold, "Number of move immediate foled"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -62,7 +69,9 @@ namespace { public: static char ID; // Pass identification - PeepholeOptimizer() : MachineFunctionPass(ID) {} + PeepholeOptimizer() : MachineFunctionPass(ID) { + initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -79,12 +88,21 @@ namespace { bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); + bool isMoveImmediate(MachineInstr *MI, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs); }; } char PeepholeOptimizer::ID = 0; -INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts", - "Peephole Optimizations", false, false); +INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false) FunctionPass *llvm::createPeepholeOptimizerPass() { return new PeepholeOptimizer(); @@ -102,12 +120,10 @@ FunctionPass *llvm::createPeepholeOptimizerPass() { bool PeepholeOptimizer:: OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs) { - LocalMIs.insert(MI); - unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; - + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; @@ -232,22 +248,17 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, - MachineBasicBlock *MBB) { + MachineBasicBlock *MBB){ // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. unsigned SrcReg; - int CmpValue; - if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) || - TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0) - return false; - - MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); - if (llvm::next(DI) != MRI->def_end()) - // Only support one definition. + int CmpMask, CmpValue; + if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; - // Attempt to convert the defining instruction to set the "zero" flag. - if (TII->ConvertToSetZeroFlag(&*DI, MI)) { + // Attempt to optimize the comparison instruction. + if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { ++NumEliminated; return true; } @@ -255,7 +266,53 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, return false; } +bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isMoveImmediate()) + return false; + if (TID.getNumDefs() != 1) + return false; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + ImmDefMIs.insert(std::make_pair(Reg, MI)); + ImmDefRegs.insert(Reg); + return true; + } + + return false; +} + +/// FoldImmediate - Try folding register operands that are defined by move +/// immediate instructions, i.e. a trivial constant folding optimization, if +/// and only if the def and use are in the same BB. +bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (ImmDefRegs.count(Reg) == 0) + continue; + DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); + assert(II != ImmDefMIs.end()); + if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { + ++NumImmFold; + return true; + } + } + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + if (DisablePeephole) + return false; + TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); @@ -264,22 +321,50 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; SmallPtrSet<MachineInstr*, 8> LocalMIs; + SmallSet<unsigned, 4> ImmDefRegs; + DenseMap<unsigned, MachineInstr*> ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; + + bool SeenMoveImm = false; LocalMIs.clear(); + ImmDefRegs.clear(); + ImmDefMIs.clear(); + bool First = true; + MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator - MII = I->begin(), ME = I->end(); MII != ME; ) { + MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; + LocalMIs.insert(MI); - if (MI->getDesc().isCompare() && - !MI->getDesc().hasUnmodeledSideEffects()) { - ++MII; // The iterator may become invalid if the compare is deleted. - Changed |= OptimizeCmpInstr(MI, MBB); + if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || + MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || + MI->hasUnmodeledSideEffects()) { + ++MII; + continue; + } + + if (MI->getDesc().isCompare()) { + if (OptimizeCmpInstr(MI, MBB)) { + // MI is deleted. + Changed = true; + MII = First ? I->begin() : llvm::next(PMII); + continue; + } + } + + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { + SeenMoveImm = true; } else { Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); - ++MII; + if (SeenMoveImm) + Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } + + First = false; + PMII = MII; + ++MII; } } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f0bd6d1372be..60c24b710792 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -133,18 +133,12 @@ namespace { std::vector<unsigned> KillIndices; public: - SchedulePostRATDList(MachineFunction &MF, - const MachineLoopInfo &MLI, - const MachineDominatorTree &MDT, - ScheduleHazardRecognizer *HR, - AntiDepBreaker *ADB, - AliasAnalysis *aa) - : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - HazardRec(HR), AntiDepBreak(ADB), AA(aa), - KillIndices(TRI->getNumRegs()) {} - - ~SchedulePostRATDList() { - } + SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); + + ~SchedulePostRATDList(); /// StartBlock - Initialize register live-range state for scheduling in /// this block. @@ -183,9 +177,34 @@ namespace { }; } +SchedulePostRATDList::SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), + KillIndices(TRI->getNumRegs()) +{ + const TargetMachine &TM = MF.getTarget(); + const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + HazardRec = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + AntiDepBreak = + ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); +} + +SchedulePostRATDList::~SchedulePostRATDList() { + delete HazardRec; + delete AntiDepBreak; +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - AA = &getAnalysis<AliasAnalysis>(); TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { return false; } else { // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; @@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); - const TargetMachine &TM = Fn.getTarget(); - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - ScheduleHazardRecognizer *HR = - TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); - AntiDepBreaker *ADB = - ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); - - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + CriticalPathRCs); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.FixupKills(MBB); } - delete HR; - delete ADB; - return true; } @@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(dbgs() << "\n*** Examining Available\n"; - LatencyPriorityQueue q = AvailableQueue; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(this); - }); + DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); SUnit *FoundSUnit = 0; bool HasNoopHazards = false; @@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue.pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index cd9d83eeb684..d6e31dae9d13 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -91,8 +91,9 @@ namespace { public: static char ID; - PreAllocSplitting() - : MachineFunctionPass(ID) {} + PreAllocSplitting() : MachineFunctionPass(ID) { + initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -106,10 +107,8 @@ namespace { AU.addPreserved<LiveStacks>(); AU.addPreserved<RegisterCoalescer>(); AU.addPreserved<CalculateSpillWeights>(); - if (StrongPHIElim) - AU.addPreservedID(StrongPHIEliminationID); - else - AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(StrongPHIEliminationID); + AU.addPreservedID(PHIEliminationID); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addRequired<VirtRegMap>(); @@ -203,9 +202,18 @@ namespace { char PreAllocSplitting::ID = 0; -INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting", +INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting", + "Pre-Register Allocation Live Interval Splitting", + false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting", - false, false); + false, false) char &llvm::PreAllocSplittingID = PreAllocSplitting::ID; @@ -324,7 +332,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false, + CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, LSs->getVNInfoAllocator()); return SS; } @@ -585,7 +593,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us SlotIndex StartIndex = LIs->getMBBStartIdx(MBB); VNInfo *RetVNI = Phis[MBB] = - LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false, + LI->getNextValue(SlotIndex(), /*FIXME*/ 0, LIs->getVNInfoAllocator()); if (!IsIntraBlock) LiveOut[MBB] = RetVNI; @@ -674,7 +682,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { DefIdx = DefIdx.getDefIndex(); assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting."); - VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc); + VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc); // If the def is a move, set the copy field. if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) @@ -807,7 +815,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, MachineBasicBlock& MBB = *RestorePt->getParent(); MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) + if (!DefMI || DefMI->getParent() == BarrierMBB) KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); else KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); @@ -872,7 +880,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false, + CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, LSs->getVNInfoAllocator()); } @@ -967,8 +975,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); - MachineInstr *DefMI = ValNo->isDefAccurate() - ? LIs->getInstructionFromIndex(ValNo->def) : NULL; + MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def); // If this would create a new join point, do not split. if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) { @@ -1005,7 +1012,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { SlotIndex SpillIndex; MachineInstr *SpillMI = NULL; int SS = -1; - if (!ValNo->isDefAccurate()) { + if (!DefMI) { // If we don't know where the def is we must split just before the barrier. if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier, BarrierMBB, SS, RefsInMBB))) { @@ -1199,12 +1206,12 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) { // We also don't try to handle the results of PHI joins, since there's // no defining instruction to analyze. - if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue; + MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); + if (!DefMI || CurrVN->isUnused()) continue; // We're only interested in eliminating cruft introduced by the splitter, // is of the form load-use or load-use-store. First, check that the // definition is a load, and remember what stack slot we loaded it from. - MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def); int FrameIndex; if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index b8831db1d118..9cd9941e56b3 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -26,8 +26,11 @@ using namespace llvm; char ProcessImplicitDefs::ID = 0; -INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs", - "Process Implicit Definitions.", false, false); +INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e2802c1fdf4a..ad7b6e4aa97f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -21,6 +21,7 @@ #define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" +#include "llvm/InlineAsm.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -29,7 +30,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -44,8 +45,12 @@ using namespace llvm; char PEI::ID = 0; -INITIALIZE_PASS(PEI, "prologepilog", - "Prologue/Epilogue Insertion", false, false); +INITIALIZE_PASS_BEGIN(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false) STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); @@ -61,6 +66,8 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); @@ -71,7 +78,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. - TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); + TFI->processFunctionBeforeCalleeSavedScan(Fn, RS); // Scan the function for modified callee saved registers and insert spill code // for any callee saved registers that are modified. @@ -91,7 +98,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TRI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); @@ -138,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; @@ -165,7 +173,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { // Some inline asm's need a stack frame, as indicated by operand 1. - if (I->getOperand(1).getImm()) + unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) AdjustsStack = true; } @@ -180,7 +189,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { // the target doesn't indicate otherwise, remove the call frame pseudos // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. - if (RegInfo->canSimplifyCallFramePseudos(Fn)) + if (TFI->canSimplifyCallFramePseudos(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } @@ -190,7 +199,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { /// registers. void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); // Get the callee saved register list... @@ -229,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; - const TargetFrameInfo::SpillSlot *FixedSpillSlots = + const TargetFrameLowering::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate @@ -247,7 +256,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { // Check to see if this physreg must be spilled to a particular stack slot // on this target. - const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && FixedSlot->Reg != Reg) ++FixedSlot; @@ -290,13 +299,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { return; const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; if (! ShrinkWrapThisFunction) { // Spill using target interface. I = EntryBlock->begin(); - if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. @@ -328,7 +338,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. - if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); @@ -480,10 +490,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); bool StackGrowsDown = - TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -549,7 +559,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { + if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -631,17 +641,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { + if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); } - if (!RegInfo->targetHandlesStackFrameRounding()) { + if (!TFI.targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) + if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has @@ -672,16 +682,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { /// prolog and epilog code to the function. /// void PEI::insertPrologEpilogCode(MachineFunction &Fn) { - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); // Add prologue to the function... - TRI->emitPrologue(Fn); + TFI.emitPrologue(Fn); // Add epilogue to restore the callee-save registers in each exiting block for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue if (!I->empty() && I->back().getDesc().isReturn()) - TRI->emitEpilogue(Fn, *I); + TFI.emitEpilogue(Fn, *I); } } @@ -694,9 +704,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetFrameInfo *TFI = TM.getFrameInfo(); + const TargetFrameLowering *TFI = TM.getFrameLowering(); bool StackGrowsDown = - TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown; + TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TRI.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode(); @@ -755,8 +765,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - TRI.eliminateFrameIndex(MI, SPAdj, - FrameIndexVirtualScavenging ? NULL : RS); + TRI.eliminateFrameIndex(MI, SPAdj, + FrameIndexVirtualScavenging ? NULL : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -825,7 +835,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { ScratchReg = RS->scavengeRegister(RC, I, SPAdj); ++NumScavengedRegs; } - // replace this reference to the virtual register with the + // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); MI->getOperand(i).setReg(ScratchReg); diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index d575124a6b3e..e2391591ad06 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -36,7 +36,9 @@ namespace llvm { class PEI : public MachineFunctionPass { public: static char ID; - PEI() : MachineFunctionPass(ID) {} + PEI() : MachineFunctionPass(ID) { + initializePEIPass(*PassRegistry::getPassRegistry()); + } const char *getPassName() const { return "Prolog/Epilog Insertion & Frame Finalization"; diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 5e86e5a9447e..73b66d868f3d 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -18,7 +18,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Mutex.h" +#include "llvm/Support/Mutex.h" #include <map> using namespace llvm; diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h new file mode 100644 index 000000000000..8c7e5f53b824 --- /dev/null +++ b/lib/CodeGen/RegAllocBase.h @@ -0,0 +1,181 @@ +//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class, which is the skeleton of a basic +// register allocation algorithm and interface for extending it. It provides the +// building blocks on which to construct other experimental allocators and test +// the validity of two principles: +// +// - If virtual and physical register liveness is modeled using intervals, then +// on-the-fly interference checking is cheap. Furthermore, interferences can be +// lazily cached and reused. +// +// - Register allocation complexity, and generated code performance is +// determined by the effectiveness of live range splitting rather than optimal +// coloring. +// +// Following the first principle, interfering checking revolves around the +// LiveIntervalUnion data structure. +// +// To fulfill the second principle, the basic allocator provides a driver for +// incremental splitting. It essentially punts on the problem of register +// coloring, instead driving the assignment of virtual to physical registers by +// the cost of splitting. The basic allocator allows for heuristic reassignment +// of registers, if a more sophisticated allocator chooses to do that. +// +// This framework provides a way to engineer the compile time vs. code +// quality trade-off without relying on a particular theoretical solver. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_REGALLOCBASE +#define LLVM_CODEGEN_REGALLOCBASE + +#include "llvm/ADT/OwningPtr.h" +#include "LiveIntervalUnion.h" +#include <queue> + +namespace llvm { + +template<typename T> class SmallVectorImpl; +class TargetRegisterInfo; +class VirtRegMap; +class LiveIntervals; +class Spiller; + +// Forward declare a priority queue of live virtual registers. If an +// implementation needs to prioritize by anything other than spill weight, then +// this will become an abstract base class with virtual calls to push/get. +class LiveVirtRegQueue; + +/// RegAllocBase provides the register allocation driver and interface that can +/// be extended to add interesting heuristics. +/// +/// Register allocators must override the selectOrSplit() method to implement +/// live range splitting. They may also override getPriority() which otherwise +/// defaults to the spill weight computed by CalculateSpillWeights. +class RegAllocBase { + LiveIntervalUnion::Allocator UnionAllocator; +protected: + // Array of LiveIntervalUnions indexed by physical register. + class LiveUnionArray { + unsigned NumRegs; + LiveIntervalUnion *Array; + public: + LiveUnionArray(): NumRegs(0), Array(0) {} + ~LiveUnionArray() { clear(); } + + unsigned numRegs() const { return NumRegs; } + + void init(LiveIntervalUnion::Allocator &, unsigned NRegs); + + void clear(); + + LiveIntervalUnion& operator[](unsigned PhysReg) { + assert(PhysReg < NumRegs && "physReg out of bounds"); + return Array[PhysReg]; + } + }; + + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + VirtRegMap *VRM; + LiveIntervals *LIS; + LiveUnionArray PhysReg2LiveUnion; + + // Current queries, one per physreg. They must be reinitialized each time we + // query on a new live virtual register. + OwningArrayPtr<LiveIntervalUnion::Query> Queries; + + RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {} + + virtual ~RegAllocBase() {} + + // A RegAlloc pass should call this before allocatePhysRegs. + void init(VirtRegMap &vrm, LiveIntervals &lis); + + // Get an initialized query to check interferences between lvr and preg. Note + // that Query::init must be called at least once for each physical register + // before querying a new live virtual register. This ties Queries and + // PhysReg2LiveUnion together. + LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { + Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]); + return Queries[PhysReg]; + } + + // The top-level driver. The output is a VirtRegMap that us updated with + // physical register assignments. + // + // If an implementation wants to override the LiveInterval comparator, we + // should modify this interface to allow passing in an instance derived from + // LiveVirtRegQueue. + void allocatePhysRegs(); + + // Get a temporary reference to a Spiller instance. + virtual Spiller &spiller() = 0; + + // getPriority - Calculate the allocation priority for VirtReg. + // Virtual registers with higher priorities are allocated first. + virtual float getPriority(LiveInterval *LI) = 0; + + // A RegAlloc pass should override this to provide the allocation heuristics. + // Each call must guarantee forward progess by returning an available PhysReg + // or new set of split live virtual registers. It is up to the splitter to + // converge quickly toward fully spilled live ranges. + virtual unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &splitLVRs) = 0; + + // A RegAlloc pass should call this when PassManager releases its memory. + virtual void releaseMemory(); + + // Helper for checking interference between a live virtual register and a + // physical register, including all its register aliases. If an interference + // exists, return the interfering register, which may be preg or an alias. + unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg); + + /// assign - Assign VirtReg to PhysReg. + /// This should not be called from selectOrSplit for the current register. + void assign(LiveInterval &VirtReg, unsigned PhysReg); + + /// unassign - Undo a previous assignment of VirtReg to PhysReg. + /// This can be invoked from selectOrSplit, but be careful to guarantee that + /// allocation is making progress. + void unassign(LiveInterval &VirtReg, unsigned PhysReg); + + // Helper for spilling all live virtual registers currently unified under preg + // that interfere with the most recently queried lvr. Return true if spilling + // was successful, and append any new spilled/split intervals to splitLVRs. + bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); + + /// addMBBLiveIns - Add physreg liveins to basic blocks. + void addMBBLiveIns(MachineFunction *); + +#ifndef NDEBUG + // Verify each LiveIntervalUnion. + void verify(); +#endif + + // Use this group name for NamedRegionTimer. + static const char *TimerGroupName; + +public: + /// VerifyEnabled - True when -verify-regalloc is given. + static bool VerifyEnabled; + +private: + void seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> >&); + + void spillReg(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); +}; + +} // end namespace llvm + +#endif // !defined(LLVM_CODEGEN_REGALLOCBASE) diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp new file mode 100644 index 000000000000..045c8db9dadb --- /dev/null +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -0,0 +1,523 @@ +//===-- RegAllocBasic.cpp - basic register allocator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RABasic function pass, which provides a minimal +// implementation of the basic register allocator. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "LiveIntervalUnion.h" +#include "RegAllocBase.h" +#include "RenderMachineFunction.h" +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Function.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#ifndef NDEBUG +#include "llvm/ADT/SparseBitVector.h" +#endif +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +#include <cstdlib> + +using namespace llvm; + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); +STATISTIC(NumNewQueued , "Number of new live ranges queued"); + +static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", + createBasicRegisterAllocator); + +// Temporary verification option until we can put verification inside +// MachineVerifier. +static cl::opt<bool, true> +VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::desc("Verify during register allocation")); + +const char *RegAllocBase::TimerGroupName = "Register Allocation"; +bool RegAllocBase::VerifyEnabled = false; + +namespace { +/// RABasic provides a minimal implementation of the basic register allocation +/// algorithm. It prioritizes live virtual registers by spill weight and spills +/// whenever a register is unavailable. This is not practical in production but +/// provides a useful baseline both for measuring other allocators and comparing +/// the speed of the basic algorithm against other styles of allocators. +class RABasic : public MachineFunctionPass, public RegAllocBase +{ + // context + MachineFunction *MF; + BitVector ReservedRegs; + + // analyses + LiveStacks *LS; + RenderMachineFunction *RMF; + + // state + std::auto_ptr<Spiller> SpillerInstance; + +public: + RABasic(); + + /// Return the pass name. + virtual const char* getPassName() const { + return "Basic Register Allocator"; + } + + /// RABasic analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + virtual Spiller &spiller() { return *SpillerInstance; } + + virtual float getPriority(LiveInterval *LI) { return LI->weight; } + + virtual unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs); + + /// Perform register allocation. + virtual bool runOnMachineFunction(MachineFunction &mf); + + static char ID; +}; + +char RABasic::ID = 0; + +} // end anonymous namespace + +RABasic::RABasic(): MachineFunctionPass(ID) { + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); +} + +void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); + AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequired<CalculateSpillWeights>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequiredID(MachineDominatorsID); + AU.addPreservedID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + DEBUG(AU.addRequired<RenderMachineFunction>()); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RABasic::releaseMemory() { + SpillerInstance.reset(0); + RegAllocBase::releaseMemory(); +} + +#ifndef NDEBUG +// Verify each LiveIntervalUnion. +void RegAllocBase::verify() { + LiveVirtRegBitSet VisitedVRegs; + OwningArrayPtr<LiveVirtRegBitSet> + unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]); + + // Verify disjoint unions. + for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { + DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI)); + LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg]; + PhysReg2LiveUnion[PhysReg].verify(VRegs); + // Union + intersection test could be done efficiently in one pass, but + // don't add a method to SparseBitVector unless we really need it. + assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions"); + VisitedVRegs |= VRegs; + } + + // Verify vreg coverage. + for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end(); + liItr != liEnd; ++liItr) { + unsigned reg = liItr->first; + if (TargetRegisterInfo::isPhysicalRegister(reg)) continue; + if (!VRM->hasPhys(reg)) continue; // spilled? + unsigned PhysReg = VRM->getPhys(reg); + if (!unionVRegs[PhysReg].test(reg)) { + dbgs() << "LiveVirtReg " << reg << " not in union " << + TRI->getName(PhysReg) << "\n"; + llvm_unreachable("unallocated live vreg"); + } + } + // FIXME: I'm not sure how to verify spilled intervals. +} +#endif //!NDEBUG + +//===----------------------------------------------------------------------===// +// RegAllocBase Implementation +//===----------------------------------------------------------------------===// + +// Instantiate a LiveIntervalUnion for each physical register. +void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator, + unsigned NRegs) { + NumRegs = NRegs; + Array = + static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs)); + for (unsigned r = 0; r != NRegs; ++r) + new(Array + r) LiveIntervalUnion(r, allocator); +} + +void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { + NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled); + TRI = &vrm.getTargetRegInfo(); + MRI = &vrm.getRegInfo(); + VRM = &vrm; + LIS = &lis; + PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs()); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); +} + +void RegAllocBase::LiveUnionArray::clear() { + if (!Array) + return; + for (unsigned r = 0; r != NumRegs; ++r) + Array[r].~LiveIntervalUnion(); + free(Array); + NumRegs = 0; + Array = 0; +} + +void RegAllocBase::releaseMemory() { + PhysReg2LiveUnion.clear(); +} + +// Visit all the live virtual registers. If they are already assigned to a +// physical register, unify them with the corresponding LiveIntervalUnion, +// otherwise push them on the priority queue for later assignment. +void RegAllocBase:: +seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> > &VirtRegQ) { + for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { + unsigned RegNum = I->first; + LiveInterval &VirtReg = *I->second; + if (TargetRegisterInfo::isPhysicalRegister(RegNum)) + PhysReg2LiveUnion[RegNum].unify(VirtReg); + else + VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum)); + } +} + +void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << '\n'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + PhysReg2LiveUnion[PhysReg].unify(VirtReg); + ++NumAssigned; +} + +void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << '\n'); + assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign"); + PhysReg2LiveUnion[PhysReg].extract(VirtReg); + VRM->clearVirt(VirtReg.reg); + ++NumUnassigned; +} + +// Top-level driver to manage the queue of unassigned VirtRegs and call the +// selectOrSplit implementation. +void RegAllocBase::allocatePhysRegs() { + + // Push each vreg onto a queue or "precolor" by adding it to a physreg union. + std::priority_queue<std::pair<float, unsigned> > VirtRegQ; + seedLiveVirtRegs(VirtRegQ); + + // Continue assigning vregs one at a time to available physical registers. + while (!VirtRegQ.empty()) { + // Pop the highest priority vreg. + LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second); + VirtRegQ.pop(); + + // selectOrSplit requests the allocator to return an available physical + // register if possible and populate a list of new live intervals that + // result from splitting. + DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName() + << ':' << VirtReg << '\n'); + typedef SmallVector<LiveInterval*, 4> VirtRegVec; + VirtRegVec SplitVRegs; + unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs); + + if (AvailablePhysReg) + assign(VirtReg, AvailablePhysReg); + + for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); + I != E; ++I) { + LiveInterval* SplitVirtReg = *I; + if (SplitVirtReg->empty()) continue; + DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + "expect split value in virtual register"); + VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg), + SplitVirtReg->reg)); + ++NumNewQueued; + } + } +} + +// Check if this live virtual register interferes with a physical register. If +// not, then check for interference on each register that aliases with the +// physical register. Return the interfering register. +unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) + if (query(VirtReg, *AliasI).checkInterference()) + return *AliasI; + return 0; +} + +// Helper for spillInteferences() that spills all interfering vregs currently +// assigned to this physical register. +void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs) { + LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg); + assert(Q.seenAllInterferences() && "need collectInterferences()"); + const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs(); + + for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(), + E = PendingSpills.end(); I != E; ++I) { + LiveInterval &SpilledVReg = **I; + DEBUG(dbgs() << "extracting from " << + TRI->getName(PhysReg) << " " << SpilledVReg << '\n'); + + // Deallocate the interfering vreg by removing it from the union. + // A LiveInterval instance may not be in a union during modification! + unassign(SpilledVReg, PhysReg); + + // Spill the extracted interval. + spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills); + } + // After extracting segments, the query's results are invalid. But keep the + // contents valid until we're done accessing pendingSpills. + Q.clear(); +} + +// Spill or split all live virtual registers currently unified under PhysReg +// that interfere with VirtReg. The newly spilled or split live intervals are +// returned by appending them to SplitVRegs. +bool +RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs) { + // Record each interference and determine if all are spillable before mutating + // either the union or live intervals. + unsigned NumInterferences = 0; + // Collect interferences assigned to any alias of the physical register. + for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) { + LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI); + NumInterferences += QAlias.collectInterferingVRegs(); + if (QAlias.seenUnspillableVReg()) { + return false; + } + } + DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << + " interferences with " << VirtReg << "\n"); + assert(NumInterferences > 0 && "expect interference"); + + // Spill each interfering vreg allocated to PhysReg or an alias. + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) + spillReg(VirtReg, *AliasI, SplitVRegs); + return true; +} + +// Add newly allocated physical registers to the MBB live in sets. +void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { + NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); + typedef SmallVector<MachineBasicBlock*, 8> MBBVec; + MBBVec liveInMBBs; + MachineBasicBlock &entryMBB = *MF->begin(); + + for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { + LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; + if (LiveUnion.empty()) + continue; + for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid(); + ++SI) { + + // Find the set of basic blocks which this range is live into... + liveInMBBs.clear(); + if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue; + + // And add the physreg for this interval to their live-in sets. + for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end(); + I != E; ++I) { + MachineBasicBlock *MBB = *I; + if (MBB == &entryMBB) continue; + if (MBB->isLiveIn(PhysReg)) continue; + MBB->addLiveIn(PhysReg); + } + } + } +} + + +//===----------------------------------------------------------------------===// +// RABasic Implementation +//===----------------------------------------------------------------------===// + +// Driver for the register assignment and splitting heuristics. +// Manages iteration over the LiveIntervalUnions. +// +// This is a minimal implementation of register assignment and splitting that +// spills whenever we run out of registers. +// +// selectOrSplit can only be called once per live virtual register. We then do a +// single interference test for each register the correct class until we find an +// available register. So, the number of interference tests in the worst case is +// |vregs| * |machineregs|. And since the number of interference tests is +// minimal, there is no value in caching them outside the scope of +// selectOrSplit(). +unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &SplitVRegs) { + // Populate a list of physical register spill candidates. + SmallVector<unsigned, 8> PhysRegSpillCands; + + // Check for an available register in this class. + const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg); + + for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF), + E = TRC->allocation_order_end(*MF); + I != E; ++I) { + + unsigned PhysReg = *I; + if (ReservedRegs.test(PhysReg)) continue; + + // Check interference and as a side effect, intialize queries for this + // VirtReg and its aliases. + unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg); + if (interfReg == 0) { + // Found an available register. + return PhysReg; + } + LiveInterval *interferingVirtReg = + Queries[interfReg].firstInterference().liveUnionPos().value(); + + // The current VirtReg must either be spillable, or one of its interferences + // must have less spill weight. + if (interferingVirtReg->weight < VirtReg.weight ) { + PhysRegSpillCands.push_back(PhysReg); + } + } + // Try to spill another interfering reg with less spill weight. + for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(), + PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + + if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue; + + assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 && + "Interference after spill."); + // Tell the caller to allocate to this newly freed physical register. + return *PhysRegI; + } + // No other spill candidates were found, so spill the current VirtReg. + DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); + SmallVector<LiveInterval*, 1> pendingSpills; + + spiller().spill(&VirtReg, SplitVRegs, pendingSpills); + + // The live virtual register requesting allocation was spilled, so tell + // the caller not to allocate anything during this round. + return 0; +} + +bool RABasic::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" + << "********** Function: " + << ((Value*)mf.getFunction())->getName() << '\n'); + + MF = &mf; + DEBUG(RMF = &getAnalysis<RenderMachineFunction>()); + + RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); + + ReservedRegs = TRI->getReservedRegs(*MF); + + SpillerInstance.reset(createSpiller(*this, *MF, *VRM)); + + allocatePhysRegs(); + + addMBBLiveIns(MF); + + // Diagnostic output before rewriting + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); + + // optional HTML output + DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM)); + + // FIXME: Verification currently must run before VirtRegRewriter. We should + // make the rewriter a separate pass and override verifyAnalysis instead. When + // that happens, verification naturally falls under VerifyMachineCode. +#ifndef NDEBUG + if (VerifyEnabled) { + // Verify accuracy of LiveIntervals. The standard machine code verifier + // ensures that each LiveIntervals covers all uses of the virtual reg. + + // FIXME: MachineVerifier is badly broken when using the standard + // spiller. Always use -spiller=inline with -verify-regalloc. Even with the + // inline spiller, some tests fail to verify because the coalescer does not + // always generate verifiable code. + MF->verify(this, "In RABasic::verify"); + + // Verify that LiveIntervals are partitioned into unions and disjoint within + // the unions. + verify(); + } +#endif // !NDEBUG + + // Run rewriter + VRM->rewrite(LIS->getSlotIndexes()); + + // The pass output is in VirtRegMap. Release all the transient data. + releaseMemory(); + + return true; +} + +FunctionPass* llvm::createBasicRegisterAllocator() +{ + return new RABasic(); +} diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index fc150d55e226..15036e38b893 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -48,7 +48,10 @@ namespace { public: static char ID; RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), - isBulkSpilling(false) {} + isBulkSpilling(false) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } private: const TargetMachine *TM; MachineFunction *MF; @@ -259,8 +262,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // instruction, not on the spill. bool SpillKill = LR.LastUse != MI; LR.Dirty = false; - DEBUG(dbgs() << "Spilling %reg" << LRI->first - << " in " << TRI->getName(LR.PhysReg)); + DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI) + << " in " << PrintReg(LR.PhysReg, TRI)); const TargetRegisterClass *RC = MRI->getRegClass(LRI->first); int FI = getStackSpaceFor(LRI->first, RC); DEBUG(dbgs() << " to stack slot #" << FI << "\n"); @@ -331,7 +334,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { MO.setIsKill(); return; default: - // The physreg was allocated to a virtual register. That means to value we + // The physreg was allocated to a virtual register. That means the value we // wanted has been clobbered. llvm_unreachable("Instruction uses an allocated register"); } @@ -458,8 +461,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { /// register must not be used for anything else when this is called. /// void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) { - DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to " - << TRI->getName(PhysReg) << "\n"); + DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to " + << PrintReg(PhysReg, TRI) << "\n"); PhysRegState[PhysReg] = LRE.first; assert(!LRE.second.PhysReg && "Already assigned a physreg"); LRE.second.PhysReg = PhysReg; @@ -503,8 +506,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { return assignVirtToPhysReg(LRE, PhysReg); } - DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName() - << "\n"); + DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " + << RC->getName() << "\n"); unsigned BestReg = 0, BestCost = spillImpossible; for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { @@ -584,8 +587,8 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, allocVirtReg(MI, *LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " - << TRI->getName(LR.PhysReg) << "\n"); + DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " + << PrintReg(LR.PhysReg, TRI) << "\n"); TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); ++NumLoads; } else if (LR.Dirty) { @@ -653,11 +656,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { if (ThroughRegs.insert(Reg)) - DEBUG(dbgs() << " %reg" << Reg); + DEBUG(dbgs() << ' ' << PrintReg(Reg)); } } @@ -685,7 +689,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { unsigned DefIdx = 0; if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; @@ -731,6 +735,27 @@ void RAFast::handleThroughOperands(MachineInstr *MI, void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); + // FIXME: This should probably be added by instruction selection instead? + // If the last instruction in the block is a return, make sure to mark it as + // using all of the live-out values in the function. Things marked both call + // and return are tail calls; do not do this for them. The tail callee need + // not take the same registers as input that it produces as output, and there + // are dependencies for its input registers elsewhere. + if (!MBB->empty() && MBB->back().getDesc().isReturn() && + !MBB->back().getDesc().isCall()) { + MachineInstr *Ret = &MBB->back(); + + for (MachineRegisterInfo::liveout_iterator + I = MF->getRegInfo().liveout_begin(), + E = MF->getRegInfo().liveout_end(); I != E; ++I) { + assert(TargetRegisterInfo::isPhysicalRegister(*I) && + "Cannot have a live-out virtual register."); + + // Add live-out registers as implicit uses. + Ret->addRegisterKilled(*I, TRI, true); + } + } + PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); @@ -761,7 +786,7 @@ void RAFast::AllocateBasicBlock() { dbgs() << "*"; break; default: - dbgs() << "=%reg" << PhysRegState[Reg]; + dbgs() << '=' << PrintReg(PhysRegState[Reg]); if (LiveVirtRegs[PhysRegState[Reg]].Dirty) dbgs() << "*"; assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && @@ -791,16 +816,18 @@ void RAFast::AllocateBasicBlock() { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; LiveDbgValueMap[Reg] = MI; LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); else { int SS = StackSlotForVirtReg[Reg]; - if (SS == -1) + if (SS == -1) { // We can't allocate a physreg for a DebugValue, sorry! + DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); MO.setReg(0); + } else { // Modify DBG_VALUE now that the value is in a spill slot. int64_t Offset = MI->getOperand(1).getImm(); @@ -817,9 +844,11 @@ void RAFast::AllocateBasicBlock() { MI = NewDV; ScanDbgValue = true; break; - } else + } else { // We can't allocate a physreg for a DebugValue; sorry! + DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); MO.setReg(0); + } } } } @@ -902,7 +931,7 @@ void RAFast::AllocateBasicBlock() { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); unsigned PhysReg = LRI->second.PhysReg; @@ -1017,8 +1046,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers - unsigned LastVirtReg = MRI->getLastVirtReg(); - StackSlotForVirtReg.grow(LastVirtReg); + StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); // Loop over all of the basic blocks, eliminating virtual register references for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp new file mode 100644 index 000000000000..c1372cd038cf --- /dev/null +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -0,0 +1,1285 @@ +//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RAGreedy function pass for register allocation in +// optimized builds. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "AllocationOrder.h" +#include "LiveIntervalUnion.h" +#include "LiveRangeEdit.h" +#include "RegAllocBase.h" +#include "Spiller.h" +#include "SpillPlacement.h" +#include "SplitKit.h" +#include "VirtRegMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Function.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineLoopRanges.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +using namespace llvm; + +STATISTIC(NumGlobalSplits, "Number of split global live ranges"); +STATISTIC(NumLocalSplits, "Number of split local live ranges"); +STATISTIC(NumReassigned, "Number of interferences reassigned"); +STATISTIC(NumEvicted, "Number of interferences evicted"); + +static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", + createGreedyRegisterAllocator); + +namespace { +class RAGreedy : public MachineFunctionPass, public RegAllocBase { + // context + MachineFunction *MF; + BitVector ReservedRegs; + + // analyses + SlotIndexes *Indexes; + LiveStacks *LS; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineLoopRanges *LoopRanges; + EdgeBundles *Bundles; + SpillPlacement *SpillPlacer; + + // state + std::auto_ptr<Spiller> SpillerInstance; + std::auto_ptr<SplitAnalysis> SA; + + // splitting state. + + /// All basic blocks where the current register is live. + SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints; + + /// For every instruction in SA->UseSlots, store the previous non-copy + /// instruction. + SmallVector<SlotIndex, 8> PrevSlot; + +public: + RAGreedy(); + + /// Return the pass name. + virtual const char* getPassName() const { + return "Greedy Register Allocator"; + } + + /// RAGreedy analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + virtual void releaseMemory(); + + virtual Spiller &spiller() { return *SpillerInstance; } + + virtual float getPriority(LiveInterval *LI); + + virtual unsigned selectOrSplit(LiveInterval&, + SmallVectorImpl<LiveInterval*>&); + + /// Perform register allocation. + virtual bool runOnMachineFunction(MachineFunction &mf); + + static char ID; + +private: + bool checkUncachedInterference(LiveInterval&, unsigned); + LiveInterval *getSingleInterference(LiveInterval&, unsigned); + bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg); + float calcInterferenceWeight(LiveInterval&, unsigned); + float calcInterferenceInfo(LiveInterval&, unsigned); + float calcGlobalSplitCost(const BitVector&); + void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, + SmallVectorImpl<LiveInterval*>&); + void calcGapWeights(unsigned, SmallVectorImpl<float>&); + SlotIndex getPrevMappedIndex(const MachineInstr*); + void calcPrevSlots(); + unsigned nextSplitPoint(unsigned); + + unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned trySplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); + unsigned trySpillInterferences(LiveInterval&, AllocationOrder&, + SmallVectorImpl<LiveInterval*>&); +}; +} // end anonymous namespace + +char RAGreedy::ID = 0; + +FunctionPass* llvm::createGreedyRegisterAllocator() { + return new RAGreedy(); +} + +RAGreedy::RAGreedy(): MachineFunctionPass(ID) { + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); + initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); +} + +void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + if (StrongPHIElim) + AU.addRequiredID(StrongPHIEliminationID); + AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequired<CalculateSpillWeights>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineLoopRanges>(); + AU.addPreserved<MachineLoopRanges>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<EdgeBundles>(); + AU.addRequired<SpillPlacement>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RAGreedy::releaseMemory() { + SpillerInstance.reset(0); + RegAllocBase::releaseMemory(); +} + +float RAGreedy::getPriority(LiveInterval *LI) { + float Priority = LI->weight; + + // Prioritize hinted registers so they are allocated first. + std::pair<unsigned, unsigned> Hint; + if (Hint.first || Hint.second) { + // The hint can be target specific, a virtual register, or a physreg. + Priority *= 2; + + // Prefer physreg hints above anything else. + if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second)) + Priority *= 2; + } + return Priority; +} + + +//===----------------------------------------------------------------------===// +// Register Reassignment +//===----------------------------------------------------------------------===// + +// Check interference without using the cache. +bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]); + if (subQ.checkInterference()) + return true; + } + return false; +} + +/// getSingleInterference - Return the single interfering virtual register +/// assigned to PhysReg. Return 0 if more than one virtual register is +/// interfering. +LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + // Check physreg and aliases. + LiveInterval *Interference = 0; + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + if (Q.checkInterference()) { + if (Interference) + return 0; + Q.collectInterferingVRegs(1); + if (!Q.seenAllInterferences()) + return 0; + Interference = Q.interferingVRegs().front(); + } + } + return Interference; +} + +// Attempt to reassign this virtual register to a different physical register. +// +// FIXME: we are not yet caching these "second-level" interferences discovered +// in the sub-queries. These interferences can change with each call to +// selectOrSplit. However, we could implement a "may-interfere" cache that +// could be conservatively dirtied when we reassign or split. +// +// FIXME: This may result in a lot of alias queries. We could summarize alias +// live intervals in their parent register's live union, but it's messy. +bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg, + unsigned WantedPhysReg) { + assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) && + "Can only reassign virtual registers"); + assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) && + "inconsistent phys reg assigment"); + + AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs); + while (unsigned PhysReg = Order.next()) { + // Don't reassign to a WantedPhysReg alias. + if (TRI->regsOverlap(PhysReg, WantedPhysReg)) + continue; + + if (checkUncachedInterference(InterferingVReg, PhysReg)) + continue; + + // Reassign the interfering virtual reg to this physical reg. + unsigned OldAssign = VRM->getPhys(InterferingVReg.reg); + DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " << + TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n'); + unassign(InterferingVReg, OldAssign); + assign(InterferingVReg, PhysReg); + ++NumReassigned; + return true; + } + return false; +} + +/// tryReassignOrEvict - Try to reassign a single interferences to a different +/// physreg, or evict a single interference with a lower spill weight. +/// @param VirtReg Currently unassigned virtual register. +/// @param Order Physregs to try. +/// @return Physreg to assign VirtReg, or 0. +unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs){ + NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled); + + // Keep track of the lightest single interference seen so far. + float BestWeight = VirtReg.weight; + LiveInterval *BestVirt = 0; + unsigned BestPhys = 0; + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg); + if (!InterferingVReg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg)) + continue; + if (reassignVReg(*InterferingVReg, PhysReg)) + return PhysReg; + + // Cannot reassign, is this an eviction candidate? + if (InterferingVReg->weight < BestWeight) { + BestVirt = InterferingVReg; + BestPhys = PhysReg; + BestWeight = InterferingVReg->weight; + } + } + + // Nothing reassigned, can we evict a lighter single interference? + if (BestVirt) { + DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n'); + unassign(*BestVirt, VRM->getPhys(BestVirt->reg)); + ++NumEvicted; + NewVRegs.push_back(BestVirt); + return BestPhys; + } + + return 0; +} + + +//===----------------------------------------------------------------------===// +// Region Splitting +//===----------------------------------------------------------------------===// + +/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints +/// when considering interference from PhysReg. Also compute an optimistic local +/// cost of this interference pattern. +/// +/// The final cost of a split is the local cost + global cost of preferences +/// broken by SpillPlacement. +/// +float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { + // Reset interference dependent info. + SpillConstraints.resize(SA->LiveBlocks.size()); + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + BC.Number = BI.MBB->getNumber(); + BC.Entry = (BI.Uses && BI.LiveIn) ? + SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = (BI.Uses && BI.LiveOut) ? + SpillPlacement::PrefReg : SpillPlacement::DontCare; + BI.OverlapEntry = BI.OverlapExit = false; + } + + // Add interference info from each PhysReg alias. + for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + if (!query(VirtReg, *AI).checkInterference()) + continue; + LiveIntervalUnion::SegmentIter IntI = + PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); + if (!IntI.valid()) + continue; + + // Determine which blocks have interference live in or after the last split + // point. + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + + // Skip interference-free blocks. + if (IntI.start() >= Stop) + continue; + + // Is the interference live-in? + if (BI.LiveIn) { + IntI.advanceTo(Start); + if (!IntI.valid()) + break; + if (IntI.start() <= Start) + BC.Entry = SpillPlacement::MustSpill; + } + + // Is the interference overlapping the last split point? + if (BI.LiveOut) { + if (IntI.stop() < BI.LastSplitPoint) + IntI.advanceTo(BI.LastSplitPoint.getPrevSlot()); + if (!IntI.valid()) + break; + if (IntI.start() < Stop) + BC.Exit = SpillPlacement::MustSpill; + } + } + + // Rewind iterator and check other interferences. + IntI.find(VirtReg.beginIndex()); + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + + // Skip interference-free blocks. + if (IntI.start() >= Stop) + continue; + + // Handle transparent blocks with interference separately. + // Transparent blocks never incur any fixed cost. + if (BI.LiveThrough && !BI.Uses) { + IntI.advanceTo(Start); + if (!IntI.valid()) + break; + if (IntI.start() >= Stop) + continue; + + if (BC.Entry != SpillPlacement::MustSpill) + BC.Entry = SpillPlacement::PrefSpill; + if (BC.Exit != SpillPlacement::MustSpill) + BC.Exit = SpillPlacement::PrefSpill; + continue; + } + + // Now we only have blocks with uses left. + // Check if the interference overlaps the uses. + assert(BI.Uses && "Non-transparent block without any uses"); + + // Check interference on entry. + if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) { + IntI.advanceTo(Start); + if (!IntI.valid()) + break; + // Not live in, but before the first use. + if (IntI.start() < BI.FirstUse) + BC.Entry = SpillPlacement::PrefSpill; + } + + // Does interference overlap the uses in the entry segment + // [FirstUse;Kill)? + if (BI.LiveIn && !BI.OverlapEntry) { + IntI.advanceTo(BI.FirstUse); + if (!IntI.valid()) + break; + // A live-through interval has no kill. + // Check [FirstUse;LastUse) instead. + if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) + BI.OverlapEntry = true; + } + + // Does interference overlap the uses in the exit segment [Def;LastUse)? + if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) { + IntI.advanceTo(BI.Def); + if (!IntI.valid()) + break; + if (IntI.start() < BI.LastUse) + BI.OverlapExit = true; + } + + // Check interference on exit. + if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) { + // Check interference between LastUse and Stop. + if (BC.Exit != SpillPlacement::PrefSpill) { + IntI.advanceTo(BI.LastUse); + if (!IntI.valid()) + break; + if (IntI.start() < Stop) + BC.Exit = SpillPlacement::PrefSpill; + } + } + } + } + + // Accumulate a local cost of this interference pattern. + float LocalCost = 0; + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + if (!BI.Uses) + continue; + SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + unsigned Inserts = 0; + + // Do we need spill code for the entry segment? + if (BI.LiveIn) + Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg; + + // For the exit segment? + if (BI.LiveOut) + Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg; + + // The local cost of spill code in this block is the block frequency times + // the number of spill instructions inserted. + if (Inserts) + LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB); + } + DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = " + << LocalCost << '\n'); + return LocalCost; +} + +/// calcGlobalSplitCost - Return the global split cost of following the split +/// pattern in LiveBundles. This cost should be added to the local cost of the +/// interference pattern in SpillConstraints. +/// +float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { + float GlobalCost = 0; + for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) { + SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + unsigned Inserts = 0; + // Broken entry preference? + Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] != + (BC.Entry == SpillPlacement::PrefReg); + // Broken exit preference? + Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] != + (BC.Exit == SpillPlacement::PrefReg); + if (Inserts) + GlobalCost += + Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB); + } + DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n'); + return GlobalCost; +} + +/// splitAroundRegion - Split VirtReg around the region determined by +/// LiveBundles. Make an effort to avoid interference from PhysReg. +/// +/// The 'register' interval is going to contain as many uses as possible while +/// avoiding interference. The 'stack' interval is the complement constructed by +/// SplitEditor. It will contain the rest. +/// +void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, + const BitVector &LiveBundles, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + DEBUG({ + dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) + << " with bundles"; + for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + + // First compute interference ranges in the live blocks. + typedef std::pair<SlotIndex, SlotIndex> IndexPair; + SmallVector<IndexPair, 8> InterferenceRanges; + InterferenceRanges.resize(SA->LiveBlocks.size()); + for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + if (!query(VirtReg, *AI).checkInterference()) + continue; + LiveIntervalUnion::SegmentIter IntI = + PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); + if (!IntI.valid()) + continue; + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + IndexPair &IP = InterferenceRanges[i]; + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + // Skip interference-free blocks. + if (IntI.start() >= Stop) + continue; + + // First interference in block. + if (BI.LiveIn) { + IntI.advanceTo(Start); + if (!IntI.valid()) + break; + if (IntI.start() >= Stop) + continue; + if (!IP.first.isValid() || IntI.start() < IP.first) + IP.first = IntI.start(); + } + + // Last interference in block. + if (BI.LiveOut) { + IntI.advanceTo(Stop); + if (!IntI.valid() || IntI.start() >= Stop) + --IntI; + if (IntI.stop() <= Start) + continue; + if (!IP.second.isValid() || IntI.stop() > IP.second) + IP.second = IntI.stop(); + } + } + } + + SmallVector<LiveInterval*, 4> SpillRegs; + LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); + SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + + // Create the main cross-block interval. + SE.openIntv(); + + // First add all defs that are live out of a block. + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + + // Should the register be live out? + if (!BI.LiveOut || !RegOut) + continue; + + IndexPair &IP = InterferenceRanges[i]; + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" + << Bundles->getBundle(BI.MBB->getNumber(), 1) + << " intf [" << IP.first << ';' << IP.second << ')'); + + // The interference interval should either be invalid or overlap MBB. + assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference"); + assert((!IP.second.isValid() || IP.second > Start) && "Bad interference"); + + // Check interference leaving the block. + if (!IP.second.isValid()) { + // Block is interference-free. + DEBUG(dbgs() << ", no interference"); + if (!BI.Uses) { + assert(BI.LiveThrough && "No uses, but not live through block?"); + // Block is live-through without interference. + DEBUG(dbgs() << ", no uses" + << (RegIn ? ", live-through.\n" : ", stack in.\n")); + if (!RegIn) + SE.enterIntvAtEnd(*BI.MBB); + continue; + } + if (!BI.LiveThrough) { + DEBUG(dbgs() << ", not live-through.\n"); + SE.useIntv(SE.enterIntvBefore(BI.Def), Stop); + continue; + } + if (!RegIn) { + // Block is live-through, but entry bundle is on the stack. + // Reload just before the first use. + DEBUG(dbgs() << ", not live-in, enter before first use.\n"); + SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop); + continue; + } + DEBUG(dbgs() << ", live-through.\n"); + continue; + } + + // Block has interference. + DEBUG(dbgs() << ", interference to " << IP.second); + + if (!BI.LiveThrough && IP.second <= BI.Def) { + // The interference doesn't reach the outgoing segment. + DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); + SE.useIntv(BI.Def, Stop); + continue; + } + + + if (!BI.Uses) { + // No uses in block, avoid interference by reloading as late as possible. + DEBUG(dbgs() << ", no uses.\n"); + SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); + assert(SegStart >= IP.second && "Couldn't avoid interference"); + continue; + } + + if (IP.second.getBoundaryIndex() < BI.LastUse) { + // There are interference-free uses at the end of the block. + // Find the first use that can get the live-out register. + SmallVectorImpl<SlotIndex>::const_iterator UI = + std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), + IP.second.getBoundaryIndex()); + assert(UI != SA->UseSlots.end() && "Couldn't find last use"); + SlotIndex Use = *UI; + assert(Use <= BI.LastUse && "Couldn't find last use"); + // Only attempt a split befroe the last split point. + if (Use.getBaseIndex() <= BI.LastSplitPoint) { + DEBUG(dbgs() << ", free use at " << Use << ".\n"); + SlotIndex SegStart = SE.enterIntvBefore(Use); + assert(SegStart >= IP.second && "Couldn't avoid interference"); + assert(SegStart < BI.LastSplitPoint && "Impossible split point"); + SE.useIntv(SegStart, Stop); + continue; + } + } + + // Interference is after the last use. + DEBUG(dbgs() << " after last use.\n"); + SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); + assert(SegStart >= IP.second && "Couldn't avoid interference"); + } + + // Now all defs leading to live bundles are handled, do everything else. + for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { + SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + + // Is the register live-in? + if (!BI.LiveIn || !RegIn) + continue; + + // We have an incoming register. Check for interference. + IndexPair &IP = InterferenceRanges[i]; + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) + << " -> BB#" << BI.MBB->getNumber()); + + // Check interference entering the block. + if (!IP.first.isValid()) { + // Block is interference-free. + DEBUG(dbgs() << ", no interference"); + if (!BI.Uses) { + assert(BI.LiveThrough && "No uses, but not live through block?"); + // Block is live-through without interference. + if (RegOut) { + DEBUG(dbgs() << ", no uses, live-through.\n"); + SE.useIntv(Start, Stop); + } else { + DEBUG(dbgs() << ", no uses, stack-out.\n"); + SE.leaveIntvAtTop(*BI.MBB); + } + continue; + } + if (!BI.LiveThrough) { + DEBUG(dbgs() << ", killed in block.\n"); + SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill)); + continue; + } + if (!RegOut) { + // Block is live-through, but exit bundle is on the stack. + // Spill immediately after the last use. + if (BI.LastUse < BI.LastSplitPoint) { + DEBUG(dbgs() << ", uses, stack-out.\n"); + SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse)); + continue; + } + // The last use is after the last split point, it is probably an + // indirect jump. + DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " + << BI.LastSplitPoint << ", stack-out.\n"); + SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint); + SE.useIntv(Start, SegEnd); + // Run a double interval from the split to the last use. + // This makes it possible to spill the complement without affecting the + // indirect branch. + SE.overlapIntv(SegEnd, BI.LastUse); + continue; + } + // Register is live-through. + DEBUG(dbgs() << ", uses, live-through.\n"); + SE.useIntv(Start, Stop); + continue; + } + + // Block has interference. + DEBUG(dbgs() << ", interference from " << IP.first); + + if (!BI.LiveThrough && IP.first >= BI.Kill) { + // The interference doesn't reach the outgoing segment. + DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); + SE.useIntv(Start, BI.Kill); + continue; + } + + if (!BI.Uses) { + // No uses in block, avoid interference by spilling as soon as possible. + DEBUG(dbgs() << ", no uses.\n"); + SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); + assert(SegEnd <= IP.first && "Couldn't avoid interference"); + continue; + } + if (IP.first.getBaseIndex() > BI.FirstUse) { + // There are interference-free uses at the beginning of the block. + // Find the last use that can get the register. + SmallVectorImpl<SlotIndex>::const_iterator UI = + std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), + IP.first.getBaseIndex()); + assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); + SlotIndex Use = (--UI)->getBoundaryIndex(); + DEBUG(dbgs() << ", free use at " << *UI << ".\n"); + SlotIndex SegEnd = SE.leaveIntvAfter(Use); + assert(SegEnd <= IP.first && "Couldn't avoid interference"); + SE.useIntv(Start, SegEnd); + continue; + } + + // Interference is before the first use. + DEBUG(dbgs() << " before first use.\n"); + SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); + assert(SegEnd <= IP.first && "Couldn't avoid interference"); + } + + SE.closeIntv(); + + // FIXME: Should we be more aggressive about splitting the stack region into + // per-block segments? The current approach allows the stack region to + // separate into connected components. Some components may be allocatable. + SE.finish(); + ++NumGlobalSplits; + + if (VerifyEnabled) { + MF->verify(this, "After splitting live range around region"); + +#ifndef NDEBUG + // Make sure that at least one of the new intervals can allocate to PhysReg. + // That was the whole point of splitting the live range. + bool found = false; + for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E; + ++I) + if (!checkUncachedInterference(**I, PhysReg)) { + found = true; + break; + } + assert(found && "No allocatable intervals after pointless splitting"); +#endif + } +} + +unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + BitVector LiveBundles, BestBundles; + float BestCost = 0; + unsigned BestReg = 0; + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + float Cost = calcInterferenceInfo(VirtReg, PhysReg); + if (BestReg && Cost >= BestCost) + continue; + + SpillPlacer->placeSpills(SpillConstraints, LiveBundles); + // No live bundles, defer to splitSingleBlocks(). + if (!LiveBundles.any()) + continue; + + Cost += calcGlobalSplitCost(LiveBundles); + if (!BestReg || Cost < BestCost) { + BestReg = PhysReg; + BestCost = Cost; + BestBundles.swap(LiveBundles); + } + } + + if (!BestReg) + return 0; + + splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Local Splitting +//===----------------------------------------------------------------------===// + + +/// calcGapWeights - Compute the maximum spill weight that needs to be evicted +/// in order to use PhysReg between two entries in SA->UseSlots. +/// +/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. +/// +void RAGreedy::calcGapWeights(unsigned PhysReg, + SmallVectorImpl<float> &GapWeight) { + assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; + const unsigned NumGaps = Uses.size()-1; + + // Start and end points for the interference check. + SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse; + SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse; + + GapWeight.assign(NumGaps, 0.0f); + + // Add interference from each overlapping register. + for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI) + .checkInterference()) + continue; + + // We know that VirtReg is a continuous interval from FirstUse to LastUse, + // so we don't need InterferenceQuery. + // + // Interference that overlaps an instruction is counted in both gaps + // surrounding the instruction. The exception is interference before + // StartIdx and after StopIdx. + // + LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx); + for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { + // Skip the gaps before IntI. + while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + // Update the gaps covered by IntI. + const float weight = IntI.value()->weight; + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = std::max(GapWeight[Gap], weight); + if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) + break; + } + if (Gap == NumGaps) + break; + } + } +} + +/// getPrevMappedIndex - Return the slot index of the last non-copy instruction +/// before MI that has a slot index. If MI is the first mapped instruction in +/// its block, return the block start index instead. +/// +SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) { + assert(MI && "Missing MachineInstr"); + const MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::const_iterator B = MBB->begin(), I = MI; + while (I != B) + if (!(--I)->isDebugValue() && !I->isCopy()) + return Indexes->getInstructionIndex(I); + return Indexes->getMBBStartIdx(MBB); +} + +/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous +/// real non-copy instruction for each instruction in SA->UseSlots. +/// +void RAGreedy::calcPrevSlots() { + const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; + PrevSlot.clear(); + PrevSlot.reserve(Uses.size()); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]); + PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex()); + } +} + +/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may +/// be beneficial to split before UseSlots[i]. +/// +/// 0 is always a valid split point +unsigned RAGreedy::nextSplitPoint(unsigned i) { + const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; + const unsigned Size = Uses.size(); + assert(i != Size && "No split points after the end"); + // Allow split before i when Uses[i] is not adjacent to the previous use. + while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex()) + ; + return i; +} + +/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only +/// basic block. +/// +unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + + // Note that it is possible to have an interval that is live-in or live-out + // while only covering a single block - A phi-def can use undef values from + // predecessors, and the block could be a single-block loop. + // We don't bother doing anything clever about such a case, we simply assume + // that the interval is continuous from FirstUse to LastUse. We should make + // sure that we don't do anything illegal to such an interval, though. + + const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots; + if (Uses.size() <= 2) + return 0; + const unsigned NumGaps = Uses.size()-1; + + DEBUG({ + dbgs() << "tryLocalSplit: "; + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + dbgs() << ' ' << SA->UseSlots[i]; + dbgs() << '\n'; + }); + + // For every use, find the previous mapped non-copy instruction. + // We use this to detect valid split points, and to estimate new interval + // sizes. + calcPrevSlots(); + + unsigned BestBefore = NumGaps; + unsigned BestAfter = 0; + float BestDiff = 0; + + const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB); + SmallVector<float, 8> GapWeight; + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + // Keep track of the largest spill weight that would need to be evicted in + // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. + calcGapWeights(PhysReg, GapWeight); + + // Try to find the best sequence of gaps to close. + // The new spill weight must be larger than any gap interference. + + // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. + unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1; + + // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). + // It is the spill weight that needs to be evicted. + float MaxGap = GapWeight[0]; + for (unsigned i = 1; i != SplitAfter; ++i) + MaxGap = std::max(MaxGap, GapWeight[i]); + + for (;;) { + // Live before/after split? + const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; + const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' + << Uses[SplitBefore] << '-' << Uses[SplitAfter] + << " i=" << MaxGap); + + // Stop before the interval gets so big we wouldn't be making progress. + if (!LiveBefore && !LiveAfter) { + DEBUG(dbgs() << " all\n"); + break; + } + // Should the interval be extended or shrunk? + bool Shrink = true; + if (MaxGap < HUGE_VALF) { + // Estimate the new spill weight. + // + // Each instruction reads and writes the register, except the first + // instr doesn't read when !FirstLive, and the last instr doesn't write + // when !LastLive. + // + // We will be inserting copies before and after, so the total number of + // reads and writes is 2 * EstUses. + // + const unsigned EstUses = 2*(SplitAfter - SplitBefore) + + 2*(LiveBefore + LiveAfter); + + // Try to guess the size of the new interval. This should be trivial, + // but the slot index of an inserted copy can be a lot smaller than the + // instruction it is inserted before if there are many dead indexes + // between them. + // + // We measure the distance from the instruction before SplitBefore to + // get a conservative estimate. + // + // The final distance can still be different if inserting copies + // triggers a slot index renumbering. + // + const float EstWeight = normalizeSpillWeight(blockFreq * EstUses, + PrevSlot[SplitBefore].distance(Uses[SplitAfter])); + // Would this split be possible to allocate? + // Never allocate all gaps, we wouldn't be making progress. + float Diff = EstWeight - MaxGap; + DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff); + if (Diff > 0) { + Shrink = false; + if (Diff > BestDiff) { + DEBUG(dbgs() << " (best)"); + BestDiff = Diff; + BestBefore = SplitBefore; + BestAfter = SplitAfter; + } + } + } + + // Try to shrink. + if (Shrink) { + SplitBefore = nextSplitPoint(SplitBefore); + if (SplitBefore < SplitAfter) { + DEBUG(dbgs() << " shrink\n"); + // Recompute the max when necessary. + if (GapWeight[SplitBefore - 1] >= MaxGap) { + MaxGap = GapWeight[SplitBefore]; + for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) + MaxGap = std::max(MaxGap, GapWeight[i]); + } + continue; + } + MaxGap = 0; + } + + // Try to extend the interval. + if (SplitAfter >= NumGaps) { + DEBUG(dbgs() << " end\n"); + break; + } + + DEBUG(dbgs() << " extend\n"); + for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1; + SplitAfter != e; ++SplitAfter) + MaxGap = std::max(MaxGap, GapWeight[SplitAfter]); + continue; + } + } + + // Didn't find any candidates? + if (BestBefore == NumGaps) + return 0; + + DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] + << '-' << Uses[BestAfter] << ", " << BestDiff + << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); + + SmallVector<LiveInterval*, 4> SpillRegs; + LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); + SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + + SE.openIntv(); + SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]); + SlotIndex SegStop = SE.leaveIntvAfter(Uses[BestAfter]); + SE.useIntv(SegStart, SegStop); + SE.closeIntv(); + SE.finish(); + ++NumLocalSplits; + + return 0; +} + +//===----------------------------------------------------------------------===// +// Live Range Splitting +//===----------------------------------------------------------------------===// + +/// trySplit - Try to split VirtReg or one of its interferences, making it +/// assignable. +/// @return Physreg when VirtReg may be assigned and/or new NewVRegs. +unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<LiveInterval*>&NewVRegs) { + SA->analyze(&VirtReg); + + // Local intervals are handled separately. + if (LIS->intervalIsInOneMBB(VirtReg)) { + NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + return tryLocalSplit(VirtReg, Order, NewVRegs); + } + + NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + + // First try to split around a region spanning multiple blocks. + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + + // Then isolate blocks with multiple uses. + SplitAnalysis::BlockPtrSet Blocks; + if (SA->getMultiUseBlocks(Blocks)) { + SmallVector<LiveInterval*, 4> SpillRegs; + LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); + SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks); + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + } + + // Don't assign any physregs. + return 0; +} + + +//===----------------------------------------------------------------------===// +// Spilling +//===----------------------------------------------------------------------===// + +/// calcInterferenceWeight - Calculate the combined spill weight of +/// interferences when assigning VirtReg to PhysReg. +float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){ + float Sum = 0; + for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AI); + Q.collectInterferingVRegs(); + if (Q.seenUnspillableVReg()) + return HUGE_VALF; + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) + Sum += Q.interferingVRegs()[i]->weight; + } + return Sum; +} + +/// trySpillInterferences - Try to spill interfering registers instead of the +/// current one. Only do it if the accumulated spill weight is smaller than the +/// current spill weight. +unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled); + unsigned BestPhys = 0; + float BestWeight = 0; + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + float Weight = calcInterferenceWeight(VirtReg, PhysReg); + if (Weight == HUGE_VALF || Weight >= VirtReg.weight) + continue; + if (!BestPhys || Weight < BestWeight) + BestPhys = PhysReg, BestWeight = Weight; + } + + // No candidates found. + if (!BestPhys) + return 0; + + // Collect all interfering registers. + SmallVector<LiveInterval*, 8> Spills; + for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AI); + Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end()); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *VReg = Q.interferingVRegs()[i]; + unassign(*VReg, *AI); + } + } + + // Spill them all. + DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight " + << BestWeight << '\n'); + for (unsigned i = 0, e = Spills.size(); i != e; ++i) + spiller().spill(Spills[i], NewVRegs, Spills); + return BestPhys; +} + + +//===----------------------------------------------------------------------===// +// Main Entry Point +//===----------------------------------------------------------------------===// + +unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<LiveInterval*> &NewVRegs) { + // First try assigning a free register. + AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); + while (unsigned PhysReg = Order.next()) { + if (!checkPhysRegInterference(VirtReg, PhysReg)) + return PhysReg; + } + + // Try to reassign interferences. + if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs)) + return PhysReg; + + assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); + + // Try splitting VirtReg or interferences. + unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + + // Try to spill another interfering reg with less spill weight. + PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs); + if (PhysReg) + return PhysReg; + + // Finally spill VirtReg itself. + NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + SmallVector<LiveInterval*, 1> pendingSpills; + spiller().spill(&VirtReg, NewVRegs, pendingSpills); + + // The live virtual register requesting allocation was spilled, so tell + // the caller not to allocate anything during this round. + return 0; +} + +bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" + << "********** Function: " + << ((Value*)mf.getFunction())->getName() << '\n'); + + MF = &mf; + if (VerifyEnabled) + MF->verify(this, "Before greedy register allocator"); + + RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>()); + Indexes = &getAnalysis<SlotIndexes>(); + DomTree = &getAnalysis<MachineDominatorTree>(); + ReservedRegs = TRI->getReservedRegs(*MF); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); + Loops = &getAnalysis<MachineLoopInfo>(); + LoopRanges = &getAnalysis<MachineLoopRanges>(); + Bundles = &getAnalysis<EdgeBundles>(); + SpillPlacer = &getAnalysis<SpillPlacement>(); + + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); + + allocatePhysRegs(); + addMBBLiveIns(MF); + LIS->addKillFlags(); + + // Run rewriter + { + NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled); + VRM->rewrite(Indexes); + } + + // The pass output is in VirtRegMap. Release all the transient data. + releaseMemory(); + + return true; +} diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 5c62354a8872..b959878bcdba 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -12,13 +12,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "LiveDebugVariables.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "Spiller.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Function.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -91,6 +92,19 @@ namespace { struct RALinScan : public MachineFunctionPass { static char ID; RALinScan() : MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerAnalysisGroup( + *PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializePreAllocSplittingPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + // Initialize the queue to record recently-used registers. if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); @@ -127,7 +141,6 @@ namespace { BitVector allocatableRegs_; BitVector reservedRegs_; LiveIntervals* li_; - LiveStacks* ls_; MachineLoopInfo *loopInfo; /// handled_ - Intervals are added to the handled_ set in the order of their @@ -183,6 +196,8 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<AliasAnalysis>(); + AU.addPreserved<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<SlotIndexes>(); if (StrongPHIElim) @@ -193,12 +208,15 @@ namespace { AU.addRequired<CalculateSpillWeights>(); if (PreSplitIntervals) AU.addRequiredID(PreAllocSplittingID); - AU.addRequired<LiveStacks>(); - AU.addPreserved<LiveStacks>(); + AU.addRequiredID(LiveStacksID); + AU.addPreservedID(LiveStacksID); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addRequired<VirtRegMap>(); AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -370,8 +388,19 @@ namespace { char RALinScan::ID = 0; } -INITIALIZE_PASS(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false); +INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) +INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) +INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", + "Linear Scan Register Allocator", false, false) void RALinScan::ComputeRelatedRegClasses() { // First pass, add all reg classes to the union, and determine at least one @@ -402,8 +431,12 @@ void RALinScan::ComputeRelatedRegClasses() { for (DenseMap<unsigned, const TargetRegisterClass*>::iterator I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); I != E; ++I) - for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) - RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); + for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) { + const TargetRegisterClass *AliasClass = + OneClassForEachPhysReg.lookup(*AS); + if (AliasClass) + RelatedRegClasses.unionSets(I->second, AliasClass); + } } /// attemptTrivialCoalescing - If a simple interval is defined by a copy, try @@ -431,8 +464,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned CandReg; { MachineInstr *CopyMI; - if (vni->def != SlotIndex() && vni->isDefAccurate() && - (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) + if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) // Defined by a copy, try to extend SrcReg forward CandReg = CopyMI->getOperand(1).getReg(); else if (TrivCoalesceEnds && @@ -442,6 +474,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { CandReg = CopyMI->getOperand(0).getReg(); else return Reg; + + // If the target of the copy is a sub-register then don't coalesce. + if(CopyMI->getOperand(0).getSubReg()) + return Reg; } if (TargetRegisterInfo::isVirtualRegister(CandReg)) { @@ -478,7 +514,6 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { allocatableRegs_ = tri_->getAllocatableSet(fn); reservedRegs_ = tri_->getReservedRegs(fn); li_ = &getAnalysis<LiveIntervals>(); - ls_ = &getAnalysis<LiveStacks>(); loopInfo = &getAnalysis<MachineLoopInfo>(); // We don't run the coalescer here because we have no reason to @@ -505,6 +540,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { // Rewrite spill code and update the PhysRegsUsed set. rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_); + assert(unhandled_.empty() && "Unhandled live intervals remain!"); finalizeRegUses(); @@ -638,8 +676,6 @@ void RALinScan::linearScan() { // Look for physical registers that end up not being allocated even though // register allocator had to spill other registers in its register class. - if (ls_->getNumIntervals() == 0) - return; if (!vrm_->FindUnusedRegisters(li_)) return; } @@ -784,30 +820,6 @@ static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, } } -/// addStackInterval - Create a LiveInterval for stack if the specified live -/// interval has been spilled. -static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, - LiveIntervals *li_, - MachineRegisterInfo* mri_, VirtRegMap &vrm_) { - int SS = vrm_.getStackSlot(cur->reg); - if (SS == VirtRegMap::NO_STACK_SLOT) - return; - - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - LiveInterval &SI = ls_->getOrCreateInterval(SS, RC); - - VNInfo *VNI; - if (SI.hasAtLeastOneValue()) - VNI = SI.getValNumInfo(0); - else - VNI = SI.getNextValue(SlotIndex(), 0, false, - ls_->getVNInfoAllocator()); - - LiveInterval &RI = li_->getInterval(cur->reg); - // FIXME: This may be overly conservative. - SI.MergeRangesInAsValue(RI, VNI); -} - /// getConflictWeight - Return the number of conflicts between cur /// live interval and defs and uses of Reg weighted by loop depthes. static @@ -925,13 +937,9 @@ LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { } void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { - bool isNew = DowngradedRegs.insert(Reg); - isNew = isNew; // Silence compiler warning. - assert(isNew && "Multiple reloads holding the same register?"); - DowngradeMap.insert(std::make_pair(li->reg, Reg)); - for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) { - isNew = DowngradedRegs.insert(*AS); - isNew = isNew; // Silence compiler warning. + for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) { + bool isNew = DowngradedRegs.insert(*AS); + (void)isNew; // Silence compiler warning. assert(isNew && "Multiple reloads holding the same register?"); DowngradeMap.insert(std::make_pair(li->reg, *AS)); } @@ -957,10 +965,11 @@ namespace { /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - DEBUG(dbgs() << "\tallocating current interval: "); + const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); + DEBUG(dbgs() << "\tallocating current interval from " + << RC->getName() << ": "); // This is an implicitly defined live interval, just assign any register. - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); if (cur->empty()) { unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) @@ -984,8 +993,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // one, e.g. X86::mov32to32_. These move instructions are not coalescable. if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if ((vni->def != SlotIndex()) && !vni->isUnused() && - vni->isDefAccurate()) { + if (!vni->isUnused()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); if (CopyMI && CopyMI->isCopy()) { unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); @@ -1225,7 +1233,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { spiller_->spill(cur, added, spillIs); std::sort(added.begin(), added.end(), LISorter()); - addStackInterval(cur, ls_, li_, mri_, *vrm_); if (added.empty()) return; // Early exit if all spills were folded. @@ -1300,7 +1307,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); spiller_->spill(sli, added, spillIs); - addStackInterval(sli, ls_, li_, mri_, *vrm_); spilled.insert(sli->reg); } @@ -1419,8 +1425,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg); // Resolve second part of the hint (if possible) given the current allocation. unsigned physReg = Hint.second; - if (physReg && - TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) + if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) physReg = vrm_->getPhys(physReg); TargetRegisterClass::iterator I, E; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 61f337bab49c..ea0d1fe0233f 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,9 +31,6 @@ #define DEBUG_TYPE "regalloc" -#include "PBQP/HeuristicSolver.h" -#include "PBQP/Graph.h" -#include "PBQP/Heuristics/Briggs.h" #include "RenderMachineFunction.h" #include "Splitter.h" #include "VirtRegMap.h" @@ -41,9 +38,13 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PBQP/HeuristicSolver.h" +#include "llvm/CodeGen/PBQP/Graph.h" +#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Support/Debug.h" @@ -51,7 +52,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include <limits> -#include <map> #include <memory> #include <set> #include <vector> @@ -60,7 +60,7 @@ using namespace llvm; static RegisterRegAlloc registerPBQPRepAlloc("pbqp", "PBQP register allocator", - llvm::createPBQPRegisterAllocator); + createDefaultPBQPRegisterAllocator); static cl::opt<bool> pbqpCoalescing("pbqp-coalescing", @@ -69,698 +69,471 @@ pbqpCoalescing("pbqp-coalescing", static cl::opt<bool> pbqpPreSplitting("pbqp-pre-splitting", - cl::desc("Pre-splite before PBQP register allocation."), + cl::desc("Pre-split before PBQP register allocation."), cl::init(false), cl::Hidden); namespace { - /// - /// PBQP based allocators solve the register allocation problem by mapping - /// register allocation problems to Partitioned Boolean Quadratic - /// Programming problems. - class PBQPRegAlloc : public MachineFunctionPass { - public: +/// +/// PBQP based allocators solve the register allocation problem by mapping +/// register allocation problems to Partitioned Boolean Quadratic +/// Programming problems. +class RegAllocPBQP : public MachineFunctionPass { +public: + + static char ID; + + /// Construct a PBQP register allocator. + RegAllocPBQP(std::auto_ptr<PBQPBuilder> b) + : MachineFunctionPass(ID), builder(b) { + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry()); + initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); + } - static char ID; + /// Return the pass name. + virtual const char* getPassName() const { + return "PBQP Register Allocator"; + } - /// Construct a PBQP register allocator. - PBQPRegAlloc() : MachineFunctionPass(ID) {} + /// PBQP analysis usage. + virtual void getAnalysisUsage(AnalysisUsage &au) const; - /// Return the pass name. - virtual const char* getPassName() const { - return "PBQP Register Allocator"; - } + /// Perform register allocation + virtual bool runOnMachineFunction(MachineFunction &MF); - /// PBQP analysis usage. - virtual void getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired<SlotIndexes>(); - au.addPreserved<SlotIndexes>(); - au.addRequired<LiveIntervals>(); - //au.addRequiredID(SplitCriticalEdgesID); - au.addRequired<RegisterCoalescer>(); - au.addRequired<CalculateSpillWeights>(); - au.addRequired<LiveStacks>(); - au.addPreserved<LiveStacks>(); - au.addRequired<MachineLoopInfo>(); - au.addPreserved<MachineLoopInfo>(); - if (pbqpPreSplitting) - au.addRequired<LoopSplitter>(); - au.addRequired<VirtRegMap>(); - au.addRequired<RenderMachineFunction>(); - MachineFunctionPass::getAnalysisUsage(au); - } +private: - /// Perform register allocation - virtual bool runOnMachineFunction(MachineFunction &MF); + typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; + typedef std::vector<const LiveInterval*> Node2LIMap; + typedef std::vector<unsigned> AllowedSet; + typedef std::vector<AllowedSet> AllowedSetMap; + typedef std::pair<unsigned, unsigned> RegPair; + typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; + typedef std::vector<PBQP::Graph::NodeItr> NodeVector; + typedef std::set<unsigned> RegSet; - private: - class LIOrdering { - public: - bool operator()(const LiveInterval *li1, const LiveInterval *li2) const { - return li1->reg < li2->reg; - } - }; - - typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap; - typedef std::vector<const LiveInterval*> Node2LIMap; - typedef std::vector<unsigned> AllowedSet; - typedef std::vector<AllowedSet> AllowedSetMap; - typedef std::set<unsigned> RegSet; - typedef std::pair<unsigned, unsigned> RegPair; - typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; - - typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet; - - typedef std::vector<PBQP::Graph::NodeItr> NodeVector; - - MachineFunction *mf; - const TargetMachine *tm; - const TargetRegisterInfo *tri; - const TargetInstrInfo *tii; - const MachineLoopInfo *loopInfo; - MachineRegisterInfo *mri; - RenderMachineFunction *rmf; - - LiveIntervals *lis; - LiveStacks *lss; - VirtRegMap *vrm; - - LI2NodeMap li2Node; - Node2LIMap node2LI; - AllowedSetMap allowedSets; - LiveIntervalSet vregIntervalsToAlloc, - emptyVRegIntervals; - NodeVector problemNodes; - - - /// Builds a PBQP cost vector. - template <typename RegContainer> - PBQP::Vector buildCostVector(unsigned vReg, - const RegContainer &allowed, - const CoalesceMap &cealesces, - PBQP::PBQPNum spillCost) const; - - /// \brief Builds a PBQP interference matrix. - /// - /// @return Either a pointer to a non-zero PBQP matrix representing the - /// allocation option costs, or a null pointer for a zero matrix. - /// - /// Expects allowed sets for two interfering LiveIntervals. These allowed - /// sets should contain only allocable registers from the LiveInterval's - /// register class, with any interfering pre-colored registers removed. - template <typename RegContainer> - PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1, - const RegContainer &allowed2) const; - - /// - /// Expects allowed sets for two potentially coalescable LiveIntervals, - /// and an estimated benefit due to coalescing. The allowed sets should - /// contain only allocable registers from the LiveInterval's register - /// classes, with any interfering pre-colored registers removed. - template <typename RegContainer> - PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1, - const RegContainer &allowed2, - PBQP::PBQPNum cBenefit) const; - - /// \brief Finds coalescing opportunities and returns them as a map. - /// - /// Any entries in the map are guaranteed coalescable, even if their - /// corresponding live intervals overlap. - CoalesceMap findCoalesces(); - - /// \brief Finds the initial set of vreg intervals to allocate. - void findVRegIntervalsToAlloc(); - - /// \brief Constructs a PBQP problem representation of the register - /// allocation problem for this function. - /// - /// @return a PBQP solver object for the register allocation problem. - PBQP::Graph constructPBQPProblem(); - - /// \brief Adds a stack interval if the given live interval has been - /// spilled. Used to support stack slot coloring. - void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - - /// \brief Given a solved PBQP problem maps this solution back to a register - /// assignment. - bool mapPBQPToRegAlloc(const PBQP::Solution &solution); - - /// \brief Postprocessing before final spilling. Sets basic block "live in" - /// variables. - void finalizeAlloc() const; - - }; - - char PBQPRegAlloc::ID = 0; -} + std::auto_ptr<PBQPBuilder> builder; + MachineFunction *mf; + const TargetMachine *tm; + const TargetRegisterInfo *tri; + const TargetInstrInfo *tii; + const MachineLoopInfo *loopInfo; + MachineRegisterInfo *mri; + RenderMachineFunction *rmf; -template <typename RegContainer> -PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg, - const RegContainer &allowed, - const CoalesceMap &coalesces, - PBQP::PBQPNum spillCost) const { + LiveIntervals *lis; + LiveStacks *lss; + VirtRegMap *vrm; - typedef typename RegContainer::const_iterator AllowedItr; + RegSet vregsToAlloc, emptyIntervalVRegs; - // Allocate vector. Additional element (0th) used for spill option - PBQP::Vector v(allowed.size() + 1, 0); + /// \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(); - v[0] = spillCost; + /// \brief Adds a stack interval if the given live interval has been + /// spilled. Used to support stack slot coloring. + void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - // Iterate over the allowed registers inserting coalesce benefits if there - // are any. - unsigned ai = 0; - for (AllowedItr itr = allowed.begin(), end = allowed.end(); - itr != end; ++itr, ++ai) { + /// \brief Given a solved PBQP problem maps this solution back to a register + /// assignment. + bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, + const PBQP::Solution &solution); - unsigned pReg = *itr; + /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// variables. + void finalizeAlloc() const; - CoalesceMap::const_iterator cmItr = - coalesces.find(RegPair(vReg, pReg)); +}; - // No coalesce - on to the next preg. - if (cmItr == coalesces.end()) - continue; +char RegAllocPBQP::ID = 0; - // We have a coalesce - insert the benefit. - v[ai + 1] = -cmItr->second; - } +} // End anonymous namespace. - return v; +unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { + Node2VReg::const_iterator vregItr = node2VReg.find(node); + assert(vregItr != node2VReg.end() && "No vreg for node."); + return vregItr->second; } -template <typename RegContainer> -PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix( - const RegContainer &allowed1, const RegContainer &allowed2) const { - - typedef typename RegContainer::const_iterator RegContainerIterator; - - // Construct a PBQP matrix representing the cost of allocation options. The - // rows and columns correspond to the allocation options for the two live - // intervals. Elements will be infinite where corresponding registers alias, - // since we cannot allocate aliasing registers to interfering live intervals. - // All other elements (non-aliasing combinations) will have zero cost. Note - // that the spill option (element 0,0) has zero cost, since we can allocate - // both intervals to memory safely (the cost for each individual allocation - // to memory is accounted for by the cost vectors for each live interval). - PBQP::Matrix *m = - new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); - - // Assume this is a zero matrix until proven otherwise. Zero matrices occur - // between interfering live ranges with non-overlapping register sets (e.g. - // non-overlapping reg classes, or disjoint sets of allowed regs within the - // same class). The term "overlapping" is used advisedly: sets which do not - // intersect, but contain registers which alias, will have non-zero matrices. - // We optimize zero matrices away to improve solver speed. - bool isZeroMatrix = true; - - - // Row index. Starts at 1, since the 0th row is for the spill option, which - // is always zero. - unsigned ri = 1; - - // Iterate over allowed sets, insert infinities where required. - for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); - a1Itr != a1End; ++a1Itr) { - - // Column index, starts at 1 as for row index. - unsigned ci = 1; - unsigned reg1 = *a1Itr; - - for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); - a2Itr != a2End; ++a2Itr) { - - unsigned reg2 = *a2Itr; - - // If the row/column regs are identical or alias insert an infinity. - if (tri->regsOverlap(reg1, reg2)) { - (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity(); - isZeroMatrix = false; - } - - ++ci; - } - - ++ri; - } - - // If this turns out to be a zero matrix... - if (isZeroMatrix) { - // free it and return null. - delete m; - return 0; - } - - // ...otherwise return the cost matrix. - return m; +PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { + VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); + assert(nodeItr != vreg2Node.end() && "No node for vreg."); + return nodeItr->second; + } -template <typename RegContainer> -PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix( - const RegContainer &allowed1, const RegContainer &allowed2, - PBQP::PBQPNum cBenefit) const { - - typedef typename RegContainer::const_iterator RegContainerIterator; - - // Construct a PBQP Matrix representing the benefits of coalescing. As with - // interference matrices the rows and columns represent allowed registers - // for the LiveIntervals which are (potentially) to be coalesced. The amount - // -cBenefit will be placed in any element representing the same register - // for both intervals. - PBQP::Matrix *m = - new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); - - // Reset costs to zero. - m->reset(0); - - // Assume the matrix is zero till proven otherwise. Zero matrices will be - // optimized away as in the interference case. - bool isZeroMatrix = true; - - // Row index. Starts at 1, since the 0th row is for the spill option, which - // is always zero. - unsigned ri = 1; - - // Iterate over the allowed sets, insert coalescing benefits where - // appropriate. - for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end(); - a1Itr != a1End; ++a1Itr) { - - // Column index, starts at 1 as for row index. - unsigned ci = 1; - unsigned reg1 = *a1Itr; - - for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end(); - a2Itr != a2End; ++a2Itr) { - - // If the row and column represent the same register insert a beneficial - // cost to preference this allocation - it would allow us to eliminate a - // move instruction. - if (reg1 == *a2Itr) { - (*m)[ri][ci] = -cBenefit; - isZeroMatrix = false; - } - - ++ci; - } - - ++ri; - } - - // If this turns out to be a zero matrix... - if (isZeroMatrix) { - // ...free it and return null. - delete m; - return 0; - } - - return m; +const PBQPRAProblem::AllowedSet& + PBQPRAProblem::getAllowedSet(unsigned vreg) const { + AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg); + assert(allowedSetItr != allowedSets.end() && "No pregs for vreg."); + const AllowedSet &allowedSet = allowedSetItr->second; + return allowedSet; } -PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { - - typedef MachineFunction::const_iterator MFIterator; - typedef MachineBasicBlock::const_iterator MBBIterator; - typedef LiveInterval::const_vni_iterator VNIIterator; +unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { + assert(isPRegOption(vreg, option) && "Not a preg option."); - CoalesceMap coalescesFound; + const AllowedSet& allowedSet = getAllowedSet(vreg); + assert(option <= allowedSet.size() && "Option outside allowed set."); + return allowedSet[option - 1]; +} - // To find coalesces we need to iterate over the function looking for - // copy instructions. - for (MFIterator bbItr = mf->begin(), bbEnd = mf->end(); - bbItr != bbEnd; ++bbItr) { +std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, + const LiveIntervals *lis, + const MachineLoopInfo *loopInfo, + const RegSet &vregs) { - const MachineBasicBlock *mbb = &*bbItr; + typedef std::vector<const LiveInterval*> LIVector; - for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end(); - iItr != iEnd; ++iItr) { + MachineRegisterInfo *mri = &mf->getRegInfo(); + const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); - const MachineInstr *instr = &*iItr; + std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem()); + PBQP::Graph &g = p->getGraph(); + RegSet pregs; - // If this isn't a copy then continue to the next instruction. - if (!instr->isCopy()) - continue; - - unsigned srcReg = instr->getOperand(1).getReg(); - unsigned dstReg = instr->getOperand(0).getReg(); + // Collect the set of preg intervals, record that they're used in the MF. + for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { + pregs.insert(itr->first); + mri->setPhysRegUsed(itr->first); + } + } - // If the registers are already the same our job is nice and easy. - if (dstReg == srcReg) - continue; + BitVector reservedRegs = tri->getReservedRegs(*mf); + + // Iterate over vregs. + for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); + vregItr != vregEnd; ++vregItr) { + unsigned vreg = *vregItr; + const TargetRegisterClass *trc = mri->getRegClass(vreg); + const LiveInterval *vregLI = &lis->getInterval(vreg); + + // Compute an initial allowed set for the current vreg. + typedef std::vector<unsigned> VRAllowed; + VRAllowed vrAllowed; + for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf), + aoEnd = trc->allocation_order_end(*mf); + aoItr != aoEnd; ++aoItr) { + unsigned preg = *aoItr; + if (!reservedRegs.test(preg)) { + vrAllowed.push_back(preg); + } + } - bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg), - dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg); + // Remove any physical registers which overlap. + for (RegSet::const_iterator pregItr = pregs.begin(), + pregEnd = pregs.end(); + pregItr != pregEnd; ++pregItr) { + unsigned preg = *pregItr; + const LiveInterval *pregLI = &lis->getInterval(preg); - // If both registers are physical then we can't coalesce. - if (srcRegIsPhysical && dstRegIsPhysical) + if (pregLI->empty()) { continue; + } - // If it's a copy that includes two virtual register but the source and - // destination classes differ then we can't coalesce. - if (!srcRegIsPhysical && !dstRegIsPhysical && - mri->getRegClass(srcReg) != mri->getRegClass(dstReg)) + if (!vregLI->overlaps(*pregLI)) { continue; - - // If one is physical and one is virtual, check that the physical is - // allocatable in the class of the virtual. - if (srcRegIsPhysical && !dstRegIsPhysical) { - const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg); - if (std::find(dstRegClass->allocation_order_begin(*mf), - dstRegClass->allocation_order_end(*mf), srcReg) == - dstRegClass->allocation_order_end(*mf)) - continue; } - if (!srcRegIsPhysical && dstRegIsPhysical) { - const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg); - if (std::find(srcRegClass->allocation_order_begin(*mf), - srcRegClass->allocation_order_end(*mf), dstReg) == - srcRegClass->allocation_order_end(*mf)) - continue; - } - - // If we've made it here we have a copy with compatible register classes. - // We can probably coalesce, but we need to consider overlap. - const LiveInterval *srcLI = &lis->getInterval(srcReg), - *dstLI = &lis->getInterval(dstReg); - if (srcLI->overlaps(*dstLI)) { - // Even in the case of an overlap we might still be able to coalesce, - // but we need to make sure that no definition of either range occurs - // while the other range is live. + // Remove the register from the allowed set. + VRAllowed::iterator eraseItr = + std::find(vrAllowed.begin(), vrAllowed.end(), preg); - // Otherwise start by assuming we're ok. - bool badDef = false; - - // Test all defs of the source range. - for (VNIIterator - vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end(); - vniItr != vniEnd; ++vniItr) { + if (eraseItr != vrAllowed.end()) { + vrAllowed.erase(eraseItr); + } - // If we find a poorly defined def we err on the side of caution. - if (!(*vniItr)->def.isValid()) { - badDef = true; - break; - } + // Also remove any aliases. + const unsigned *aliasItr = tri->getAliasSet(preg); + if (aliasItr != 0) { + for (; *aliasItr != 0; ++aliasItr) { + VRAllowed::iterator eraseItr = + std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr); - // If we find a def that kills the coalescing opportunity then - // record it and break from the loop. - if (dstLI->liveAt((*vniItr)->def)) { - badDef = true; - break; + if (eraseItr != vrAllowed.end()) { + vrAllowed.erase(eraseItr); } } + } + } - // If we have a bad def give up, continue to the next instruction. - if (badDef) - continue; - - // Otherwise test definitions of the destination range. - for (VNIIterator - vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end(); - vniItr != vniEnd; ++vniItr) { + // Construct the node. + PBQP::Graph::NodeItr node = + g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); - // We want to make sure we skip the copy instruction itself. - if ((*vniItr)->getCopy() == instr) - continue; + // Record the mapping and allowed set in the problem. + p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end()); - if (!(*vniItr)->def.isValid()) { - badDef = true; - break; - } + PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? + vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min(); - if (srcLI->liveAt((*vniItr)->def)) { - badDef = true; - break; - } - } + addSpillCosts(g.getNodeCosts(node), spillCost); + } - // As before a bad def we give up and continue to the next instr. - if (badDef) - continue; + for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); + vr1Itr != vrEnd; ++vr1Itr) { + unsigned vr1 = *vr1Itr; + const LiveInterval &l1 = lis->getInterval(vr1); + const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); + + for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr); + vr2Itr != vrEnd; ++vr2Itr) { + unsigned vr2 = *vr2Itr; + const LiveInterval &l2 = lis->getInterval(vr2); + const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); + + assert(!l2.empty() && "Empty interval in vreg set?"); + if (l1.overlaps(l2)) { + PBQP::Graph::EdgeItr edge = + g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), + PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); + + addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri); } - - // If we make it to here then either the ranges didn't overlap, or they - // did, but none of their definitions would prevent us from coalescing. - // We're good to go with the coalesce. - - float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0; - - coalescesFound[RegPair(srcReg, dstReg)] = cBenefit; - coalescesFound[RegPair(dstReg, srcReg)] = cBenefit; } - } - return coalescesFound; + return p; } -void PBQPRegAlloc::findVRegIntervalsToAlloc() { - - // Iterate over all live ranges. - for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { - - // Ignore physical ones. - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) - continue; - - LiveInterval *li = itr->second; - - // If this live interval is non-empty we will use pbqp to allocate it. - // Empty intervals we allocate in a simple post-processing stage in - // finalizeAlloc. - if (!li->empty()) { - vregIntervalsToAlloc.insert(li); - } - else { - emptyVRegIntervals.insert(li); - } - } +void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, + PBQP::PBQPNum spillCost) { + costVec[0] = spillCost; } -PBQP::Graph PBQPRegAlloc::constructPBQPProblem() { - - typedef std::vector<const LiveInterval*> LIVector; - typedef std::vector<unsigned> RegVector; +void PBQPBuilder::addInterferenceCosts( + PBQP::Matrix &costMat, + const PBQPRAProblem::AllowedSet &vr1Allowed, + const PBQPRAProblem::AllowedSet &vr2Allowed, + const TargetRegisterInfo *tri) { + assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch."); + assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch."); - // This will store the physical intervals for easy reference. - LIVector physIntervals; + for (unsigned i = 0; i != vr1Allowed.size(); ++i) { + unsigned preg1 = vr1Allowed[i]; - // Start by clearing the old node <-> live interval mappings & allowed sets - li2Node.clear(); - node2LI.clear(); - allowedSets.clear(); - - // Populate physIntervals, update preg use: - for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); - itr != end; ++itr) { + for (unsigned j = 0; j != vr2Allowed.size(); ++j) { + unsigned preg2 = vr2Allowed[j]; - if (TargetRegisterInfo::isPhysicalRegister(itr->first)) { - physIntervals.push_back(itr->second); - mri->setPhysRegUsed(itr->second->reg); + if (tri->regsOverlap(preg1, preg2)) { + costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + } } } +} - // Iterate over vreg intervals, construct live interval <-> node number - // mappings. - for (LiveIntervalSet::const_iterator - itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end(); - itr != end; ++itr) { - const LiveInterval *li = *itr; - - li2Node[li] = node2LI.size(); - node2LI.push_back(li); - } - - // Get the set of potential coalesces. - CoalesceMap coalesces; - - if (pbqpCoalescing) { - coalesces = findCoalesces(); - } - - // Construct a PBQP solver for this problem - PBQP::Graph problem; - problemNodes.resize(vregIntervalsToAlloc.size()); - - // Resize allowedSets container appropriately. - allowedSets.resize(vregIntervalsToAlloc.size()); - - BitVector ReservedRegs = tri->getReservedRegs(*mf); - - // Iterate over virtual register intervals to compute allowed sets... - for (unsigned node = 0; node < node2LI.size(); ++node) { - - // Grab pointers to the interval and its register class. - const LiveInterval *li = node2LI[node]; - const TargetRegisterClass *liRC = mri->getRegClass(li->reg); +std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build( + MachineFunction *mf, + const LiveIntervals *lis, + const MachineLoopInfo *loopInfo, + const RegSet &vregs) { - // Start by assuming all allocable registers in the class are allowed... - RegVector liAllowed; - TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf); - TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf); - for (TargetRegisterClass::iterator it = aob; it != aoe; ++it) - if (!ReservedRegs.test(*it)) - liAllowed.push_back(*it); + std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs); + PBQP::Graph &g = p->getGraph(); - // Eliminate the physical registers which overlap with this range, along - // with all their aliases. - for (LIVector::iterator pItr = physIntervals.begin(), - pEnd = physIntervals.end(); pItr != pEnd; ++pItr) { + const TargetMachine &tm = mf->getTarget(); + CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo()); - if (!li->overlaps(**pItr)) - continue; + // Scan the machine function and add a coalescing cost whenever CoalescerPair + // gives the Ok. + for (MachineFunction::const_iterator mbbItr = mf->begin(), + mbbEnd = mf->end(); + mbbItr != mbbEnd; ++mbbItr) { + const MachineBasicBlock *mbb = &*mbbItr; - unsigned pReg = (*pItr)->reg; - - // If we get here then the live intervals overlap, but we're still ok - // if they're coalescable. - if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end()) - continue; + for (MachineBasicBlock::const_iterator miItr = mbb->begin(), + miEnd = mbb->end(); + miItr != miEnd; ++miItr) { + const MachineInstr *mi = &*miItr; - // If we get here then we have a genuine exclusion. + if (!cp.setRegisters(mi)) { + continue; // Not coalescable. + } - // Remove the overlapping reg... - RegVector::iterator eraseItr = - std::find(liAllowed.begin(), liAllowed.end(), pReg); + if (cp.getSrcReg() == cp.getDstReg()) { + continue; // Already coalesced. + } - if (eraseItr != liAllowed.end()) - liAllowed.erase(eraseItr); + unsigned dst = cp.getDstReg(), + src = cp.getSrcReg(); - const unsigned *aliasItr = tri->getAliasSet(pReg); + const float copyFactor = 0.5; // Cost of copy relative to load. Current + // value plucked randomly out of the air. + + PBQP::PBQPNum cBenefit = + copyFactor * LiveIntervals::getSpillWeight(false, true, + loopInfo->getLoopDepth(mbb)); - if (aliasItr != 0) { - // ...and its aliases. - for (; *aliasItr != 0; ++aliasItr) { - RegVector::iterator eraseItr = - std::find(liAllowed.begin(), liAllowed.end(), *aliasItr); + if (cp.isPhys()) { + if (!lis->isAllocatable(dst)) { + continue; + } - if (eraseItr != liAllowed.end()) { - liAllowed.erase(eraseItr); + const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); + unsigned pregOpt = 0; + while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { + ++pregOpt; + } + if (pregOpt < allowed.size()) { + ++pregOpt; // +1 to account for spill option. + PBQP::Graph::NodeItr node = p->getNodeForVReg(src); + addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); + } + } else { + const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); + const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); + PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); + PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); + PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); + if (edge == g.edgesEnd()) { + edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, + allowed2->size() + 1, + 0)); + } else { + if (g.getEdgeNode1(edge) == node2) { + std::swap(node1, node2); + std::swap(allowed1, allowed2); } } + + addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2, + cBenefit); } } + } - // Copy the allowed set into a member vector for use when constructing cost - // vectors & matrices, and mapping PBQP solutions back to assignments. - allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end()); + return p; +} - // Set the spill cost to the interval weight, or epsilon if the - // interval weight is zero - PBQP::PBQPNum spillCost = (li->weight != 0.0) ? - li->weight : std::numeric_limits<PBQP::PBQPNum>::min(); +void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, + unsigned pregOption, + PBQP::PBQPNum benefit) { + costVec[pregOption] += -benefit; +} - // Build a cost vector for this interval. - problemNodes[node] = - problem.addNode( - buildCostVector(li->reg, allowedSets[node], coalesces, spillCost)); +void PBQPBuilderWithCoalescing::addVirtRegCoalesce( + PBQP::Matrix &costMat, + const PBQPRAProblem::AllowedSet &vr1Allowed, + const PBQPRAProblem::AllowedSet &vr2Allowed, + PBQP::PBQPNum benefit) { - } + assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch."); + assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch."); + for (unsigned i = 0; i != vr1Allowed.size(); ++i) { + unsigned preg1 = vr1Allowed[i]; + for (unsigned j = 0; j != vr2Allowed.size(); ++j) { + unsigned preg2 = vr2Allowed[j]; + + if (preg1 == preg2) { + costMat[i + 1][j + 1] += -benefit; + } + } + } +} - // Now add the cost matrices... - for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) { - const LiveInterval *li = node2LI[node1]; - // Test for live range overlaps and insert interference matrices. - for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) { - const LiveInterval *li2 = node2LI[node2]; +void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + //au.addRequiredID(SplitCriticalEdgesID); + au.addRequired<RegisterCoalescer>(); + au.addRequired<CalculateSpillWeights>(); + au.addRequired<LiveStacks>(); + au.addPreserved<LiveStacks>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + if (pbqpPreSplitting) + au.addRequired<LoopSplitter>(); + au.addRequired<VirtRegMap>(); + au.addRequired<RenderMachineFunction>(); + MachineFunctionPass::getAnalysisUsage(au); +} - CoalesceMap::const_iterator cmItr = - coalesces.find(RegPair(li->reg, li2->reg)); +void RegAllocPBQP::findVRegIntervalsToAlloc() { - PBQP::Matrix *m = 0; + // Iterate over all live ranges. + for (LiveIntervals::iterator itr = lis->begin(), end = lis->end(); + itr != end; ++itr) { - if (cmItr != coalesces.end()) { - m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2], - cmItr->second); - } - else if (li->overlaps(*li2)) { - m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]); - } + // Ignore physical ones. + if (TargetRegisterInfo::isPhysicalRegister(itr->first)) + continue; - if (m != 0) { - problem.addEdge(problemNodes[node1], - problemNodes[node2], - *m); + LiveInterval *li = itr->second; - delete m; - } + // If this live interval is non-empty we will use pbqp to allocate it. + // Empty intervals we allocate in a simple post-processing stage in + // finalizeAlloc. + if (!li->empty()) { + vregsToAlloc.insert(li->reg); + } else { + emptyIntervalVRegs.insert(li->reg); } } - - assert(problem.getNumNodes() == allowedSets.size()); -/* - std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, " - << problem.getNumEdges() << " edges.\n"; - - problem.printDot(std::cerr); -*/ - // We're done, PBQP problem constructed - return it. - return problem; } -void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, +void RegAllocPBQP::addStackInterval(const LiveInterval *spilled, MachineRegisterInfo* mri) { int stackSlot = vrm->getStackSlot(spilled->reg); - if (stackSlot == VirtRegMap::NO_STACK_SLOT) + if (stackSlot == VirtRegMap::NO_STACK_SLOT) { return; + } const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); VNInfo *vni; - if (stackInterval.getNumValNums() != 0) + if (stackInterval.getNumValNums() != 0) { vni = stackInterval.getValNumInfo(0); - else + } else { vni = stackInterval.getNextValue( - SlotIndex(), 0, false, lss->getVNInfoAllocator()); + SlotIndex(), 0, lss->getVNInfoAllocator()); + } LiveInterval &rhsInterval = lis->getInterval(spilled->reg); stackInterval.MergeRangesInAsValue(rhsInterval, vni); } -bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { - +bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, + const PBQP::Solution &solution) { // Set to true if we have any spills bool anotherRoundNeeded = false; // Clear the existing allocation. vrm->clearAllVirt(); - // Iterate over the nodes mapping the PBQP solution to a register assignment. - for (unsigned node = 0; node < node2LI.size(); ++node) { - unsigned virtReg = node2LI[node]->reg, - allocSelection = solution.getSelection(problemNodes[node]); - - - // If the PBQP solution is non-zero it's a physical register... - if (allocSelection != 0) { - // Get the physical reg, subtracting 1 to account for the spill option. - unsigned physReg = allowedSets[node][allocSelection - 1]; - - DEBUG(dbgs() << "VREG " << virtReg << " -> " - << tri->getName(physReg) << "\n"); - - assert(physReg != 0); - - // Add to the virt reg map and update the used phys regs. - vrm->assignVirt2Phys(virtReg, physReg); - } - // ...Otherwise it's a spill. - else { - - // Make sure we ignore this virtual reg on the next round - // of allocation - vregIntervalsToAlloc.erase(&lis->getInterval(virtReg)); - - // Insert spill ranges for this live range - const LiveInterval *spillInterval = node2LI[node]; - double oldSpillWeight = spillInterval->weight; + const PBQP::Graph &g = problem.getGraph(); + // Iterate over the nodes mapping the PBQP solution to a register + // assignment. + for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), + nodeEnd = g.nodesEnd(); + node != nodeEnd; ++node) { + unsigned vreg = problem.getVRegForNode(node); + unsigned alloc = solution.getSelection(node); + + if (problem.isPRegOption(vreg, alloc)) { + unsigned preg = problem.getPRegForOption(vreg, alloc); + DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n"); + assert(preg != 0 && "Invalid preg selected."); + vrm->assignVirt2Phys(vreg, preg); + } else if (problem.isSpillOption(vreg, alloc)) { + vregsToAlloc.erase(vreg); + const LiveInterval* spillInterval = &lis->getInterval(vreg); + double oldWeight = spillInterval->weight; SmallVector<LiveInterval*, 8> spillIs; rmf->rememberUseDefs(spillInterval); std::vector<LiveInterval*> newSpills = @@ -768,42 +541,42 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { addStackInterval(spillInterval, mri); rmf->rememberSpills(spillInterval, newSpills); - (void) oldSpillWeight; - DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: " - << oldSpillWeight << ", New vregs: "); + (void) oldWeight; + DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " + << oldWeight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. for (std::vector<LiveInterval*>::const_iterator itr = newSpills.begin(), end = newSpills.end(); itr != end; ++itr) { - assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << (*itr)->reg << " "); - - vregIntervalsToAlloc.insert(*itr); + vregsToAlloc.insert((*itr)->reg); } DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. anotherRoundNeeded |= !newSpills.empty(); + } else { + assert(false && "Unknown allocation option."); } } return !anotherRoundNeeded; } -void PBQPRegAlloc::finalizeAlloc() const { + +void RegAllocPBQP::finalizeAlloc() const { typedef LiveIntervals::iterator LIIterator; typedef LiveInterval::Ranges::const_iterator LRIterator; // First allocate registers for the empty intervals. - for (LiveIntervalSet::const_iterator - itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end(); + for (RegSet::const_iterator + itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); itr != end; ++itr) { - LiveInterval *li = *itr; + LiveInterval *li = &lis->getInterval(*itr); unsigned physReg = vrm->getRegAllocPref(li->reg); @@ -828,11 +601,9 @@ void PBQPRegAlloc::finalizeAlloc() const { // Get the physical register for this interval if (TargetRegisterInfo::isPhysicalRegister(li->reg)) { reg = li->reg; - } - else if (vrm->isAssignedReg(li->reg)) { + } else if (vrm->isAssignedReg(li->reg)) { reg = vrm->getPhys(li->reg); - } - else { + } else { // Ranges which are assigned a stack slot only are ignored. continue; } @@ -849,7 +620,7 @@ void PBQPRegAlloc::finalizeAlloc() const { // Find the set of basic blocks which this range is live into... if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) { // And add the physreg for this interval to their live-in sets. - for (unsigned i = 0; i < liveInMBBs.size(); ++i) { + for (unsigned i = 0; i != liveInMBBs.size(); ++i) { if (liveInMBBs[i] != entryMBB) { if (!liveInMBBs[i]->isLiveIn(reg)) { liveInMBBs[i]->addLiveIn(reg); @@ -863,7 +634,7 @@ void PBQPRegAlloc::finalizeAlloc() const { } -bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { +bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { mf = &MF; tm = &mf->getTarget(); @@ -894,7 +665,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { findVRegIntervalsToAlloc(); // If there are non-empty intervals allocate them using pbqp. - if (!vregIntervalsToAlloc.empty()) { + if (!vregsToAlloc.empty()) { bool pbqpAllocComplete = false; unsigned round = 0; @@ -902,11 +673,13 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { while (!pbqpAllocComplete) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); - PBQP::Graph problem = constructPBQPProblem(); + std::auto_ptr<PBQPRAProblem> problem = + builder->build(mf, lis, loopInfo, vregsToAlloc); PBQP::Solution solution = - PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem); + PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( + problem->getGraph()); - pbqpAllocComplete = mapPBQPToRegAlloc(solution); + pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); ++round; } @@ -917,12 +690,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { rmf->renderMachineFunction("After PBQP register allocation.", vrm); - vregIntervalsToAlloc.clear(); - emptyVRegIntervals.clear(); - li2Node.clear(); - node2LI.clear(); - allowedSets.clear(); - problemNodes.clear(); + vregsToAlloc.clear(); + emptyIntervalVRegs.clear(); DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); @@ -934,9 +703,18 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { return true; } -FunctionPass* llvm::createPBQPRegisterAllocator() { - return new PBQPRegAlloc(); +FunctionPass* llvm::createPBQPRegisterAllocator( + std::auto_ptr<PBQPBuilder> builder) { + return new RegAllocPBQP(builder); } +FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { + if (pbqpCoalescing) { + return createPBQPRegisterAllocator( + std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing())); + } // else + return createPBQPRegisterAllocator( + std::auto_ptr<PBQPBuilder>(new PBQPBuilder())); +} #undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 02b5539f0f4f..407559a211a0 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -24,7 +24,8 @@ using namespace llvm; // Register the RegisterCoalescer interface, providing a nice name to refer to. -static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer"); +INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", + SimpleRegisterCoalescing) char RegisterCoalescer::ID = 0; // RegisterCoalescer destructor: DO NOT move this to the header file diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index 93426eecbbc1..cbfd5a23d63d 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -30,9 +30,14 @@ using namespace llvm; char RenderMachineFunction::ID = 0; -INITIALIZE_PASS(RenderMachineFunction, "rendermf", +INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf", "Render machine functions (and related info) to HTML pages", - false, false); + false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(RenderMachineFunction, "rendermf", + "Render machine functions (and related info) to HTML pages", + false, false) static cl::opt<std::string> outputFileSuffix("rmf-file-suffix", @@ -458,14 +463,9 @@ namespace llvm { liItr != liEnd; ++liItr) { LiveInterval *li = liItr->second; - const TargetRegisterClass *liTRC; - if (TargetRegisterInfo::isPhysicalRegister(li->reg)) continue; - liTRC = mri->getRegClass(li->reg); - - // For all ranges in the current interal. for (LiveInterval::iterator lrItr = li->begin(), lrEnd = li->end(); diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h index 8d56a8292ac5..85719923c0c6 100644 --- a/lib/CodeGen/RenderMachineFunction.h +++ b/lib/CodeGen/RenderMachineFunction.h @@ -202,7 +202,9 @@ namespace llvm { public: static char ID; - RenderMachineFunction() : MachineFunctionPass(ID) {} + RenderMachineFunction() : MachineFunctionPass(ID) { + initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &au) const; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 7d39dc496afe..3388889c9e91 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} +/// getInstrDesc helper to handle SDNodes. +const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node || !Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); +} + /// dump - dump the schedule. void ScheduleDAG::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { @@ -68,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb, /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. -void SUnit::addPred(const SDep &D) { +bool SUnit::addPred(const SDep &D) { // If this node already has this depenence, don't add a redundant one. for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) - return; + return false; // Now add a corresponding succ to N. SDep P = D; P.setSUnit(this); @@ -99,6 +106,7 @@ void SUnit::addPred(const SDep &D) { this->setDepthDirty(); N->setHeightDirty(); } + return true; } /// removePred - This removes the specified edge as a pred of the current @@ -278,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " # preds left : " << NumPredsLeft << "\n"; dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; dbgs() << " Depth : " << Depth << "\n"; dbgs() << " Height : " << Height << "\n"; @@ -492,7 +501,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { /// all nodes affected by the edge insertion. These nodes will later get new /// topological indexes by means of the Shift method. void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, - bool& HasLoop) { + bool &HasLoop) { std::vector<const SUnit*> WorkList; WorkList.reserve(SUnits.size()); diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 0a2fb3796a42..6b7a8c6491bd 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -57,7 +57,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, assert(I->getReg() && "Unknown physical register!"); unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) .addReg(I->getReg()); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ea93dd5c6663..f17023eabb72 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -16,6 +16,7 @@ #include "ScheduleDAGInstrs.h" #include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -32,9 +33,9 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()), - Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { - MFI = mf.getFrameInfo(); + : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), + InstrItins(mf.getTarget().getInstrItineraryData()), + Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { DbgValueVec.clear(); } @@ -78,12 +79,12 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { } while (1); } -/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject +/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. static const Value *getUnderlyingObject(const Value *V) { // First just call Value::getUnderlyingObject to let it do what it does. do { - V = V->getUnderlyingObject(); + V = GetUnderlyingObject(V); // If it found an inttoptr, use special code to continue climing. if (Operator::getOpcode(V) != Instruction::IntToPtr) break; @@ -141,6 +142,46 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { } } +/// AddSchedBarrierDeps - Add dependencies from instructions in the current +/// list of instructions being scheduled to scheduling barrier by adding +/// the exit SU to the register defs and use list. This is because we want to +/// make sure instructions which define registers that are either used by +/// the terminator or are live-out are properly scheduled. This is +/// especially important when the definition latency of the return value(s) +/// are too high to be hidden by the branch or when the liveout registers +/// used by instructions in the fallthrough block. +void ScheduleDAGInstrs::AddSchedBarrierDeps() { + MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; + ExitSU.setInstr(ExitMI); + bool AllDepKnown = ExitMI && + (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier()); + if (ExitMI && AllDepKnown) { + // If it's a call or a barrier, add dependencies on the defs and uses of + // instruction. + for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = ExitMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); + Uses[Reg].push_back(&ExitSU); + } + } else { + // For others, e.g. fallthrough, conditional branch, assume the exit + // uses all the registers that are livein to the successor blocks. + SmallSet<unsigned, 8> Seen; + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + if (Seen.insert(Reg)) + Uses[Reg].push_back(&ExitSU); + } + } +} + void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // We'll be allocating one SUnit for each instruction, plus one for // the region exit node. @@ -175,6 +216,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // without emitting the info from the previous call. DbgValueVec.clear(); + // Model data dependencies between instructions being scheduled and the + // ExitSU. + AddSchedBarrierDeps(); + // Walk the list of instructions, from bottom moving up. for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; MII != MIE; --MII) { @@ -194,6 +239,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); + SU->isCall = TID.isCall(); + SU->isCommutable = TID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -228,6 +275,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; + if (DefSU == &ExitSU) + continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(Reg))) @@ -237,6 +286,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { std::vector<SUnit *> &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; + if (DefSU == &ExitSU) + continue; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(*Alias))) @@ -258,12 +309,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // TODO: Perhaps we should get rid of // SpecialAddressLatency and just move this into // adjustSchedDependency for the targets that care about it. - if (SpecialAddressLatency != 0 && !UnitLatencies) { + if (SpecialAddressLatency != 0 && !UnitLatencies && + UseSU != &ExitSU) { MachineInstr *UseMI = UseSU->getInstr(); const TargetInstrDesc &UseTID = UseMI->getDesc(); int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg); assert(RegUseIndex >= 0 && "UseMI doesn's use register!"); - if ((UseTID.mayLoad() || UseTID.mayStore()) && + if (RegUseIndex >= 0 && + (UseTID.mayLoad() || UseTID.mayStore()) && (unsigned)RegUseIndex < UseTID.getNumOperands() && UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; @@ -357,7 +410,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // produce more precise dependence information. #define STORE_LOAD_LATENCY 1 unsigned TrueMemOrderLatency = 0; - if (TID.isCall() || TID.hasUnmodeledSideEffects() || + if (TID.isCall() || MI->hasUnmodeledSideEffects() || (MI->hasVolatileMemoryRef() && (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) { // Be conservative with these and add dependencies on all memory @@ -446,6 +499,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Treat all other stores conservatively. goto new_alias_chain; } + + if (!ExitSU.isPred(SU)) + // Push store's up a bit to avoid them getting in between cmp + // and branches. + ExitSU.addPred(SDep(SU, SDep::Order, 0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); } else if (TID.mayLoad()) { bool MayAlias = true; TrueMemOrderLatency = 0; @@ -498,23 +559,22 @@ void ScheduleDAGInstrs::FinishBlock() { } void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - // Compute the latency for the node. - SU->Latency = - InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); + if (!InstrItins || InstrItins->isEmpty()) { + SU->Latency = 1; - // Simplistic target-independent heuristic: assume that loads take - // extra time. - if (InstrItins.isEmpty()) + // Simplistic target-independent heuristic: assume that loads take + // extra time. if (SU->getInstr()->getDesc().mayLoad()) SU->Latency += 2; + } else { + SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); + } } void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) + if (!InstrItins || InstrItins->isEmpty()) return; // For a data dependency with a known register... @@ -528,14 +588,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { - int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), - DefIdx); - if (DefCycle >= 0) { - MachineInstr *UseMI = Use->getInstr(); - const unsigned UseClass = UseMI->getDesc().getSchedClass(); - - // For all uses of the register, calculate the maxmimum latency - int Latency = -1; + const MachineOperand &MO = DefMI->getOperand(DefIdx); + if (MO.isReg() && MO.isImplicit() && + DefIdx >= (int)DefMI->getDesc().getNumOperands()) { + // This is an implicit def, getOperandLatency() won't return the correct + // latency. e.g. + // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def> + // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ... + // What we want is to compute latency between def of %D6/%D7 and use of + // %Q3 instead. + DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI); + } + MachineInstr *UseMI = Use->getInstr(); + // For all uses of the register, calculate the maxmimum latency + int Latency = -1; + if (UseMI) { for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = UseMI->getOperand(i); if (!MO.isReg() || !MO.isUse()) @@ -544,15 +611,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, if (MOReg != Reg) continue; - int UseCycle = InstrItins.getOperandCycle(UseClass, i); - if (UseCycle >= 0) - Latency = std::max(Latency, DefCycle - UseCycle + 1); + int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx, + UseMI, i); + Latency = std::max(Latency, UseCycle); } - - // If we found a latency, then replace the existing dependence latency. - if (Latency >= 0) - dep.setLatency(Latency); + } else { + // UseMI is null, then it must be a scheduling barrier. + if (!InstrItins || InstrItins->isEmpty()) + return; + unsigned DefClass = DefMI->getDesc().getSchedClass(); + Latency = InstrItins->getOperandCycle(DefClass, DefIdx); } + + // If we found a latency, then replace the existing dependence latency. + if (Latency >= 0) + dep.setLatency(Latency); } } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index c8f543f7146d..c878287d9c8c 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -101,6 +101,7 @@ namespace llvm { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; const MachineFrameInfo *MFI; + const InstrItineraryData *InstrItins; /// Defs, Uses - Remember where defs and uses of each physical register /// are as we iterate upward through the instructions. This is allocated @@ -163,6 +164,15 @@ namespace llvm { /// input. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// AddSchedBarrierDeps - Add dependencies from instructions in the current + /// list of instructions being scheduled to scheduling barrier. We want to + /// make sure instructions which define registers that are either used by + /// the terminator or are live-out are properly scheduled. This is + /// especially important when the definition latency of the return value(s) + /// are too high to be hidden by the branch or when the liveout registers + /// used by instructions in the fallthrough block. + void AddSchedBarrierDeps(); + /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index cbde2b01eeaf..e6d7ded8a784 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -1,4 +1,4 @@ -//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===// +//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===// // // The LLVM Compiler Infrastructure // @@ -7,56 +7,81 @@ // //===----------------------------------------------------------------------===// // -// This implements a hazard recognizer using the instructions itineraries -// defined for the current target. +// This file implements the ScoreboardHazardRecognizer class, which +// encapsultes hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" -#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrItineraries.h" using namespace llvm; -PostRAHazardRecognizer:: -PostRAHazardRecognizer(const InstrItineraryData &LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) { +#ifndef NDEBUG +const char *ScoreboardHazardRecognizer::DebugType = ""; +#endif + +ScoreboardHazardRecognizer:: +ScoreboardHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *SchedDAG, + const char *ParentDebugType) : + ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), + IssueCount(0) { + +#ifndef NDEBUG + DebugType = ParentDebugType; +#endif + // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; - if (!ItinData.isEmpty()) { + if (ItinData && !ItinData->isEmpty()) { + IssueWidth = ItinData->IssueWidth; + for (unsigned idx = 0; ; ++idx) { - if (ItinData.isEndMarker(idx)) + if (ItinData->isEndMarker(idx)) break; - const InstrStage *IS = ItinData.beginStage(idx); - const InstrStage *E = ItinData.endStage(idx); + const InstrStage *IS = ItinData->beginStage(idx); + const InstrStage *E = ItinData->endStage(idx); + unsigned CurCycle = 0; unsigned ItinDepth = 0; - for (; IS != E; ++IS) - ItinDepth += IS->getCycles(); + for (; IS != E; ++IS) { + unsigned StageDepth = CurCycle + IS->getCycles(); + if (ItinDepth < StageDepth) ItinDepth = StageDepth; + CurCycle += IS->getNextCycles(); + } - ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); + // Find the next power-of-2 >= ItinDepth + while (ItinDepth > ScoreboardDepth) { + ScoreboardDepth *= 2; + } } + MaxLookAhead = ScoreboardDepth; } ReservedScoreboard.reset(ScoreboardDepth); RequiredScoreboard.reset(ScoreboardDepth); - DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " + DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " << ScoreboardDepth << '\n'); } -void PostRAHazardRecognizer::Reset() { +void ScoreboardHazardRecognizer::Reset() { + IssueCount = 0; RequiredScoreboard.reset(); ReservedScoreboard.reset(); } -void PostRAHazardRecognizer::ScoreBoard::dump() const { +void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; unsigned last = Depth - 1; @@ -72,24 +97,46 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const { } } +bool ScoreboardHazardRecognizer::atIssueLimit() const { + if (IssueWidth == 0) + return false; + + return IssueCount == IssueWidth; +} + ScheduleHazardRecognizer::HazardType -PostRAHazardRecognizer::getHazardType(SUnit *SU) { - if (ItinData.isEmpty()) +ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (!ItinData || ItinData->isEmpty()) return NoHazard; - unsigned cycle = 0; + // Note that stalls will be negative for bottom-up scheduling. + int cycle = Stalls; // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { + + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + if (TID == NULL) { + // Don't check hazards for non-machineinstr Nodes. + return NoHazard; + } + unsigned idx = TID->getSchedClass(); + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); + int StageCycle = cycle + (int)i; + if (StageCycle < 0) + continue; + + if (StageCycle >= (int)RequiredScoreboard.getDepth()) { + assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() && + "Scoreboard depth exceeded!"); + // This stage was stalled beyond pipeline depth, so cannot conflict. + break; + } unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { @@ -97,18 +144,18 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) { assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; + freeUnits &= ~ReservedScoreboard[StageCycle]; // FALLTHROUGH case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; + freeUnits &= ~RequiredScoreboard[StageCycle]; break; } if (!freeUnits) { DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(SU->getInstr()->dump()); + DEBUG(DAG->dumpNode(SU)); return Hazard; } } @@ -120,17 +167,24 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) { return NoHazard; } -void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { - if (ItinData.isEmpty()) +void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { + if (!ItinData || ItinData->isEmpty()) return; - unsigned cycle = 0; - // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + assert(TID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(TID->Opcode)) + return; + + ++IssueCount; + + unsigned cycle = 0; + + unsigned idx = TID->getSchedClass(); + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the // stage is occupied. FIXME it would be more accurate to reserve // the same unit free in all the cycles. @@ -174,7 +228,16 @@ void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { DEBUG(RequiredScoreboard.dump()); } -void PostRAHazardRecognizer::AdvanceCycle() { +void ScoreboardHazardRecognizer::AdvanceCycle() { + IssueCount = 0; ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } + +void ScoreboardHazardRecognizer::RecedeCycle() { + IssueCount = 0; + ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0; + ReservedScoreboard.recede(); + RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0; + RequiredScoreboard.recede(); +} diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 799988a4c862..15932c03a190 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -21,5 +21,3 @@ add_llvm_library(LLVMSelectionDAG TargetLowering.cpp TargetSelectionDAGInfo.cpp ) - -target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c9c4d91e9736..90356021f602 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -43,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); namespace { static cl::opt<bool> @@ -185,7 +185,7 @@ namespace { SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); - SDValue visitBIT_CONVERT(SDNode *N); + SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); @@ -229,12 +229,13 @@ namespace { SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); - SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); + SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue TransformFPLoadStorePair(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -248,16 +249,19 @@ namespace { bool isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, + const MDNode *TBAAInfo1, SDValue Ptr2, int64_t Size2, const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2) const; + unsigned SrcValueAlign2, + const MDNode *TBAAInfo2) const; /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlignment) const; + unsigned &SrcValueAlignment, + const MDNode *&TBAAInfo) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -270,15 +274,15 @@ namespace { /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); - + SelectionDAG &getDAG() const { return DAG; } - + /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy() { return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); } - + /// isTypeLegal - This method returns true if we are running before type /// legalization or if the specified VT is legal. bool isTypeLegal(const EVT &VT) { @@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.2 "; + DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); @@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, PVT, dl, + return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getPointerInfo(), MemVT, LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } @@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; return DAG.getNode(ExtOpc, dl, PVT, Op); - } + } } if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) @@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getPointerInfo(), MemVT, LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); @@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << "\nReplacing.3 "; + DEBUG(dbgs() << "\nReplacing.3 "; N->dump(&DAG); dbgs() << "\nWith: "; RV.getNode()->dump(&DAG); @@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); - case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); + case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); case ISD::FSUB: return visitFSUB(N); @@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } } - + SDValue Result; // If we've change things around then replace token factor. @@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) { N0.getOperand(0).getOperand(1), N0.getOperand(1))); + if (N1.getOpcode() == ISD::AND) { + SDValue AndOp0 = N1.getOperand(0); + ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); + unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + + // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) + // and similar xforms where the inner op is either ~0 or 0. + if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { + DebugLoc DL = N->getDebugLoc(); + return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); + } + } + + // add (sext i1), X -> sub X, (zext i1) + if (N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getValueType() == MVT::i1 && + !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { + DebugLoc DL = N->getDebugLoc(); + SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); + } + return SDValue(); } @@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { if (N->hasNUsesOfValue(0, 1)) return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Flag)); + N->getDebugLoc(), MVT::Glue)); // canonicalize constant to RHS. if (N0C && !N1C) @@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc x, 0) -> x + no carry out if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Flag)); + N->getDebugLoc(), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; @@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Flag)); + N->getDebugLoc(), MVT::Glue)); } return SDValue(); @@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { return SDValue(); } +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations) { + if (!VT.isVector()) { + return DAG.getConstant(0, VT); + } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + // Produce a vector of zeros. + SDValue El = DAG.getConstant(0, VT.getVectorElementType()); + std::vector<SDValue> Ops(VT.getVectorNumElements(), El); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, + &Ops[0], Ops.size()); + } + return SDValue(); +} + SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return DAG.getConstant(0, N->getValueType(0)); + return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); @@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + // fold A-(A-B) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) + return N1.getOperand(1); // fold (A+B)-A -> B if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) return N0.getOperand(1); @@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); + // If the type twice as wide is legal, transform the mulhs to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + return SDValue(); } @@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + return SDValue(); } @@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); if (Res.getNode()) return Res; + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + return SDValue(); } @@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); if (Res.getNode()) return Res; + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, getShiftAmountTy())); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + return SDValue(); } @@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); - Mask.trunc(N0Op0.getValueSizeInBits()); + Mask = Mask.trunc(N0Op0.getValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), N0.getValueType(), N0Op0); @@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getPointerInfo(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); AddToWorkList(N); @@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); AddToWorkList(N); @@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - - SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } - + // Do not change the width of a volatile load. // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). @@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } AddToWorkList(NewPtr.getNode()); - + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), Alignment); AddToWorkList(N); @@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N01C->getAPIntValue(), VT)); } // fold (xor x, x) -> 0 - if (N0 == N1) { - if (!VT.isVector()) { - return DAG.getConstant(0, VT); - } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ - // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); - std::vector<SDValue> Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &Ops[0], Ops.size()); - } - } + if (N0 == N1) + return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { LHS->getOperand(1), N->getOperand(1)); // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), + SDValue NewShift = DAG.getNode(N->getOpcode(), + LHS->getOperand(0).getDebugLoc(), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. @@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC = TruncC.trunc(TruncVT.getSizeInBits()); return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, DAG.getNode(ISD::TRUNCATE, @@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 > OpSizeInBits) + if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } + + // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) + // For this to be valid, the second form must not preserve any of the bits + // that are shifted out by the inner shift in the first form. This means + // the outer shift size must be >= the number of bits added by the ext. + // As a corollary, we don't care what kind of ext it is. + if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) && + N0.getOperand(0).getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, + DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + N0.getOperand(0)->getOperand(0)), + DAG.getConstant(c1 + c2, N1.getValueType())); + } + } + // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or // (srl (and x, (shl -1, c1)), (sub c1, c2)) if (N1C && N0.getOpcode() == ISD::SRL && @@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); + EVT::getIntegerVT(*DAG.getContext(), + OpSizeInBits - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); + TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) + // if c1 is equal to the number of bits the trunc removes + if (N0.getOpcode() == ISD::TRUNCATE && + (N0.getOperand(0).getOpcode() == ISD::SRL || + N0.getOperand(0).getOpcode() == ISD::SRA) && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOperand(1).hasOneUse() && + N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { + EVT LargeVT = N0.getOperand(0).getValueType(); + ConstantSDNode *LargeShiftAmt = + cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); + + if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == + LargeShiftAmt->getZExtValue()) { + SDValue Amt = + DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), + getShiftAmountTy()); + SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + N0.getOperand(0).getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + } + } + // Simplify, based on bits shifted out of the LHS. if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); - if (c1 + c2 > OpSizeInBits) + if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } - + + // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) + if (N1C && N0.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, VT); + return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, + DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + N0.getOperand(0)->getOperand(0), + DAG.getConstant(c1 + c2, ShiftCountVT))); + } + } + // fold (srl (shl x, c), c) -> (and x, cst2) if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && N0.getValueSizeInBits() <= 64) { @@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(~0ULL >> ShAmt, VT)); } - + // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC = TruncC.trunc(TruncVT.getSizeInBits()); return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // brcond i32 %c ... // // into - // + // // %a = ... // %b = and %a, 2 // %c = setcc eq %b, 0 @@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } if (BothLiveOut) // Both unextended and extended values are live out. There had better be - // good a reason for the transformation. + // a good reason for the transformation. return ExtendNodes.size(); } return true; @@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()), NegOne, DAG.getConstant(0, VT)); - } + } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) + SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); + if (NarrowLoad.getNode()) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorkList(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask.zext(VT.getSizeInBits()); + Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, DAG.getConstant(Mask, VT)); } @@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1)) && N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) { + SDValue ShAmt = N0.getOperand(1); + unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); if (N0.getOpcode() == ISD::SHL) { + SDValue InnerZExt = N0.getOperand(0); // If the original shl may be shifting out bits, do not perform this // transformation. - unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); - unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - - N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); - if (ShAmt > KnownZeroBits) + unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - + InnerZExt.getOperand(0).getValueType().getSizeInBits(); + if (ShAmtVal > KnownZeroBits) return SDValue(); } - DebugLoc dl = N->getDebugLoc(); - return DAG.getNode(N0.getOpcode(), dl, VT, - DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), - DAG.getNode(ISD::ZERO_EXTEND, dl, - N0.getOperand(1).getValueType(), - N0.getOperand(1))); + + DebugLoc DL = N->getDebugLoc(); + + // Ensure that the shift amount is wide enough for the shifted value. + if (VT.getSizeInBits() >= 256) + ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); + + return DAG.getNode(N0.getOpcode(), DL, VT, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), + ShAmt); } return SDValue(); @@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); } APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask.zext(VT.getSizeInBits()); + Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, DAG.getConstant(Mask, VT)); } @@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, - N->getDebugLoc(), - LN0->getChain(), LN0->getBasePtr(), - LN0->getSrcValue(), - LN0->getSrcValueOffset(), MemVT, + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) - return SDValue(); } else if (Opc == ISD::SRL) { - // Annother special-case: SRL is basically zero-extending a narrower - // value. + // Another special-case: SRL is basically zero-extending a narrower value. ExtType = ISD::ZEXTLOAD; N0 = SDValue(N, 0); ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return SDValue(); + unsigned ShAmt = 0; - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? @@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } + + // At this point, we must have a load or else we can't do the transform. + if (!isa<LoadSDNode>(N0)) return SDValue(); + + // If the shift amount is larger than the input type then we're not + // accessing any of the loaded bytes. If the load was a zextload/extload + // then the result of the shift+trunc is zero/undef (handled elsewhere). + // If the load was a sextload then the result is a splat of the sign bit + // of the extended byte. This is not worth optimizing for. + if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + return SDValue(); } } - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && - cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && - // Do not change the width of a volatile load. - !cast<LoadSDNode>(N0)->isVolatile()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT PtrType = N0.getOperand(1).getValueType(); - - // For big endian targets, we need to adjust the offset to the pointer to - // load the correct bytes. - if (TLI.isBigEndian()) { - unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); - ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; - } - - uint64_t PtrOff = ShAmt / 8; - unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), - PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, PtrType)); - AddToWorkList(NewPtr.getNode()); - - SDValue Load = (ExtType == ISD::NON_EXTLOAD) - ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) - : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); - - // Replace the old load's chain with the new load's chain. - WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), - &DeadNodes); + // If the load is shifted left (and the result isn't shifted back right), + // we can fold the truncate through the shift. + unsigned ShLeftAmt = 0; + if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShLeftAmt = N01->getZExtValue(); + N0 = N0.getOperand(0); + } + } + + // If we haven't found a load, we can't narrow it. Don't transform one with + // multiple uses, this would require adding a new load. + if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || + // Don't change the width of a volatile load. + cast<LoadSDNode>(N0)->isVolatile()) + return SDValue(); + + // Verify that we are actually reducing a load width here. + if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT PtrType = N0.getOperand(1).getValueType(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (TLI.isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, PtrType)); + AddToWorkList(NewPtr.getNode()); + + SDValue Load; + if (ExtType == ISD::NON_EXTLOAD) + Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); + else + Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + NewAlign); + + // Replace the old load's chain with the new load's chain. + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), + &DeadNodes); - // Return the new loaded value. - return Load; + // Shift the result left, if we've swallowed a left shift. + SDValue Result = Load; + if (ShLeftAmt != 0) { + EVT ShImmTy = getShiftAmountTy(); + if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) + ShImmTy = VT; + Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } - return SDValue(); + // Return the new loaded value. + return Result; } SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { @@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + EVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getBasePtr(), LN0->getPointerInfo(), + EVT, LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); CombineTo(N, ExtLoad); @@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); - if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + LD1->getPointerInfo().getAddrSpace() != + LD2->getPointerInfo().getAddrSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); @@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), - LD1->getBasePtr(), LD1->getSrcValue(), - LD1->getSrcValueOffset(), false, false, Align); + LD1->getBasePtr(), LD1->getPointerInfo(), + false, false, Align); } return SDValue(); } -SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { +SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) - return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); } // If the input is a constant, let getNode fold it. if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); + SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); if (Res.getNode() != N) { if (!LegalOperations || TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { } // (conv (conv x, t1), t2) -> (conv x, t2) - if (N0.getOpcode() == ISD::BIT_CONVERT) - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) @@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { if (Align <= OrigAlign) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), - LN0->getBasePtr(), - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), - DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), N0.getValueType(), Load), Load.getValue(1)); return Load; @@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { // This often reduces constant pool loads. if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { - SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, + SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); @@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); @@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { X, DAG.getConstant(SignBit, VT)); AddToWorkList(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), + SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, Cst, DAG.getConstant(~SignBit, VT)); @@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } -/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector +/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. @@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, - DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), DstEltVT, BV->getOperand(0))); - + SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { SDValue Op = BV->getOperand(i); @@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), + Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } @@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // same sizes. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); - BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); + BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } @@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (DstEltVT.isFloatingPoint()) { assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); - SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); + SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. - return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); + return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } // Okay, we know the src/dst types are both integers of differing types. @@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (Op.getOpcode() == ISD::UNDEF) continue; EltIsUndef = false; - NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). + NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). zextOrTrunc(SrcBitSize).zext(DstBitSize); } @@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { continue; } - APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> - getAPIntValue()).zextOrTrunc(SrcBitSize); + APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> + getAPIntValue().zextOrTrunc(SrcBitSize); for (unsigned j = 0; j != NumOutputsPerInput; ++j) { - APInt ThisVal = APInt(OpVal).trunc(DstBitSize); + APInt ThisVal = OpVal.trunc(DstBitSize); Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); - if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) + if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) // Simply turn this into a SCALAR_TO_VECTOR of the new type. return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, Ops[0]); @@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), + LN0->getBasePtr(), LN0->getPointerInfo(), N0.getValueType(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->getAlignment()); @@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Int); } } @@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && + if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), Int); } } @@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } - SDNode *Trunc = 0; - if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { - // Look past truncate. - Trunc = N1.getNode(); - N1 = N1.getOperand(0); - } + if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || + ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && + (N1.getOperand(0).hasOneUse() && + N1.getOperand(0).getOpcode() == ISD::SRL))) { + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE) { + // Look pass the truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } - if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { // Match this pattern so that we can generate simpler code: // // %a = ... @@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // brcond i32 %c ... // // into - // + // // %a = ... // %b = and i32 %a, 2 // %c = setcc eq %b, 0 @@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } } } + + if (Trunc) + // Restore N1 if the above transformation doesn't match. + N1 = N->getOperand(1); } - + // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { @@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal = true; } - SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1; - - EVT SetCCVT = NodeToReplace.getValueType(); + EVT SetCCVT = N1.getValueType(); if (LegalTypes) SetCCVT = TLI.getSetCCResultType(SetCCVT); SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), @@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes); - removeFromWorkList(NodeToReplace.getNode()); - DAG.DeleteNode(NodeToReplace.getNode()); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); + removeFromWorkList(N1.getNode()); + DAG.DeleteNode(N1.getNode()); return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), MVT::Other, Chain, SetCC, N2); } @@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), - N->getDebugLoc(), - Chain, Ptr, LD->getSrcValue(), - LD->getSrcValueOffset(), LD->getMemoryVT(), + return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + LD->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); } } @@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), - BetterChain, Ptr, - LD->getSrcValue(), LD->getSrcValueOffset(), + BetterChain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), - LD->getDebugLoc(), - BetterChain, Ptr, LD->getSrcValue(), - LD->getSrcValueOffset(), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + LD->getValueType(0), + BetterChain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), @@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplLoad.getValue(1)); - + // Make sure the new and old chains are cleaned up. AddToWorkList(Token.getNode()); - + // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { static std::pair<unsigned, unsigned> CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair<unsigned, unsigned> Result(0, 0); - + // Check for the structure we're looking for. if (V->getOpcode() != ISD::AND || !isa<ConstantSDNode>(V->getOperand(1)) || !ISD::isNormalLoad(V->getOperand(0).getNode())) return Result; - + // Check the chain and pointer. LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. - + // The store should be chained directly to the load or be an operand of a // tokenfactor. if (LD == Chain.getNode()) @@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { } if (!isOk) return Result; } - + // This only handles simple types. if (V.getValueType() != MVT::i16 && V.getValueType() != MVT::i32 && @@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. - + // See if we have a continuous run of bits. If so, we have 0*1+0* if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) return Result; @@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. if (V.getValueType() != MVT::i64 && NotMaskLZ) NotMaskLZ -= 64-V.getValueSizeInBits(); - + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; switch (MaskedBytes) { - case 1: - case 2: + case 1: + case 2: case 4: break; default: return Result; // All one mask, or 5-byte mask. } - + // Verify that the first bit starts at a multiple of mask so that the access // is aligned the same as the access width. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; - + Result.first = MaskedBytes; Result.second = NotMaskTZ/8; return Result; @@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, unsigned NumBytes = MaskInfo.first; unsigned ByteShift = MaskInfo.second; SelectionDAG &DAG = DC->getDAG(); - + // Check to see if IVal is all zeros in the part being masked in by the 'or' // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; - + // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. MVT VT = MVT::getIntegerVT(NumBytes*8); if (!DC->isTypeLegal(VT)) return 0; - + // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) @@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, StOffset = ByteShift; else StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; - + SDValue Ptr = St->getBasePtr(); if (StOffset) { Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } - + // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); - + ++OpsNarrowed; - return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, - St->getSrcValue(), St->getSrcValueOffset()+StOffset, + return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), false, false, NewAlign).getNode(); } @@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); unsigned Opc = Value.getOpcode(); - + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst // is a byte mask indicating a consecutive number of bytes, check to see if // Y is known to provide just those bytes. If so, we try to replace the @@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) return SDValue(NewST, 0); - + // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) @@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { Value.getOperand(0), ST,this)) return SDValue(NewST, 0); } - + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && Chain == SDValue(N0.getNode(), 1)) { LoadSDNode *LD = cast<LoadSDNode>(N0); - if (LD->getBasePtr() != Ptr) + if (LD->getBasePtr() != Ptr || + LD->getPointerInfo().getAddrSpace() != + ST->getPointerInfo().getAddrSpace()) return SDValue(); // Find the type to narrow it the load / op / store to. @@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { DAG.getConstant(PtrOff, Ptr.getValueType())); SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), LD->getChain(), NewPtr, - LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), NewVal, NewPtr, - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); AddToWorkList(NewPtr.getNode()); @@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); } +/// TransformFPLoadStorePair - For a given floating point load / store pair, +/// if the load value isn't used by any other operations, then consider +/// transforming the pair to integer load / store operations if the target +/// deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && + Value.hasOneUse() && + Chain == SDValue(Value.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(Value); + EVT VT = LD->getMemoryVT(); + if (!VT.isFloatingPoint() || + VT != ST->getMemoryVT() || + LD->isNonTemporal() || + ST->isNonTemporal() || + LD->getPointerInfo().getAddrSpace() != 0 || + ST->getPointerInfo().getAddrSpace() != 0) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || + !TLI.isOperationLegal(ISD::STORE, IntVT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) + return SDValue(); + + unsigned LDAlign = LD->getAlignment(); + unsigned STAlign = ST->getAlignment(); + const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); + if (LDAlign < ABIAlign || STAlign < ABIAlign) + return SDValue(); + + SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + false, false, LDAlign); + + SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + NewLD, ST->getBasePtr(), + ST->getPointerInfo(), + false, false, STAlign); + + AddToWorkList(NewLD.getNode()); + AddToWorkList(NewST.getNode()); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), + &DeadNodes); + ++LdStFP2Int; + return NewST; + } + + return SDValue(); +} + SDValue DAGCombiner::visitSTORE(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a store of a bit convert, store the input value if the // resultant store does not need a higher alignment than the original. - if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && + if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); @@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), + Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), OrigAlign); } @@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), + Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } break; @@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->isVolatile(), + Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } else if (!ST->isVolatile() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { @@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), + Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, ST->getAlignment()); Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); - SVOffset += 4; Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, - Ptr, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, Alignment); return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, St0, St1); @@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align); } } + // Try transforming a pair floating point load / store ops to integer + // load / store ops. + SDValue NewST = TransformFPLoadStorePair(N); + if (NewST.getNode()) + return NewST; + if (CombinerAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getSrcValue(),ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } else { ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } @@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { AddToWorkList(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, - APInt::getLowBitsSet( - Value.getValueType().getScalarType().getSizeInBits(), - ST->getMemoryVT().getScalarType().getSizeInBits()))) + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits()))) return SDValue(N, 0); } @@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getSrcValue(), - ST->getSrcValueOffset(), ST->getMemoryVT(), + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } @@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVal.getOpcode() == ISD::UNDEF) return InVec; + EVT VT = InVec.getValueType(); + + // If we can't generate a legal BUILD_VECTOR, exit + if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return SDValue(); + // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new // vector with the inserted element. if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { @@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (Elt < Ops.size()) Ops[Elt] = InVal; return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - InVec.getValueType(), &Ops[0], Ops.size()); + VT, &Ops[0], Ops.size()); } - // If the invec is an UNDEF and if EltNo is a constant, create a new + // If the invec is an UNDEF and if EltNo is a constant, create a new // BUILD_VECTOR with undef elements and the inserted element. - if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && + if (InVec.getOpcode() == ISD::UNDEF && isa<ConstantSDNode>(EltNo)) { - EVT VT = InVec.getValueType(); EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); @@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (Elt < Ops.size()) Ops[Elt] = InVal; return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - InVec.getValueType(), &Ops[0], Ops.size()); + VT, &Ops[0], Ops.size()); } return SDValue(); } @@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(1); if (isa<ConstantSDNode>(EltNo)) { - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; EVT VT = InVec.getValueType(); EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; - if (InVec.getOpcode() == ISD::BIT_CONVERT) { + if (InVec.getOpcode() == ISD::BITCAST) { EVT BCVT = InVec.getOperand(0).getValueType(); if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); @@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Select the input vector, guarding against out of range extract vector. unsigned NumElems = VT.getVectorNumElements(); - int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); + int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); - if (InVec.getOpcode() == ISD::BIT_CONVERT) + if (InVec.getOpcode() == ISD::BITCAST) InVec = InVec.getOperand(0); if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); @@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) return SDValue(); + // If Idx was -1 above, Elt is going to be -1, so just return undef. + if (Elt == -1) + return DAG.getUNDEF(LN0->getBasePtr().getValueType()); + unsigned Align = LN0->getAlignment(); if (NewLoad) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); + TLI.getTargetData() + ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) return SDValue(); @@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } SDValue NewPtr = LN0->getBasePtr(); + unsigned PtrOff = 0; + if (Elt) { - unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; + PtrOff = LVT.getSizeInBits() * Elt / 8; EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; @@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getSrcValue(), LN0->getSrcValueOffset(), + LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), Align); } @@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); if (ExtIndex > VT.getVectorNumElements()) return SDValue(); - + Mask.push_back(ExtIndex); continue; } @@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // FIXME: implement canonicalizations from DAG.getVectorShuffle() - // If it is a splat, check if the argument vector is a build_vector with - // all scalar elements the same. - if (cast<ShuffleVectorSDNode>(N)->isSplat()) { + // If it is a splat, check if the argument vector is another splat or a + // build_vector with all scalar elements the same. + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { SDNode *V = N0.getNode(); // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. - if (V->getOpcode() == ISD::BIT_CONVERT) { + if (V->getOpcode() == ISD::BITCAST) { SDValue ConvInput = V->getOperand(0); if (ConvInput.getValueType().isVector() && ConvInput.getValueType().getVectorNumElements() == NumElts) @@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } if (V->getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElems = V->getNumOperands(); - unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); - if (NumElems > BaseIdx) { - SDValue Base; - bool AllSame = true; - for (unsigned i = 0; i != NumElems; ++i) { - if (V->getOperand(i).getOpcode() != ISD::UNDEF) { - Base = V->getOperand(i); - break; - } + assert(V->getNumOperands() == NumElts && + "BUILD_VECTOR has wrong number of operands"); + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; } - // Splat of <u, u, u, u>, return <u, u, u, u> - if (!Base.getNode()) - return N0; - for (unsigned i = 0; i != NumElems; ++i) { - if (V->getOperand(i) != Base) { - AllSame = false; - break; - } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; } - // Splat of <x, x, x, x>, return <x, x, x, x> - if (AllSame) - return N0; } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; } } return SDValue(); @@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { - if (RHS.getOpcode() == ISD::BIT_CONVERT) + if (RHS.getOpcode() == ISD::BITCAST) RHS = RHS.getOperand(0); if (RHS.getOpcode() == ISD::BUILD_VECTOR) { SmallVector<int, 8> Indices; @@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { DAG.getConstant(0, EltVT)); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), RVT, &ZeroOps[0], ZeroOps.size()); - LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); + LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); + return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); } } @@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // things. Simplifying them may result in a loss of legality. if (LegalOperations) return SDValue(); - EVT VT = N->getValueType(0); - assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); + assert(N->getValueType(0).isVector() && + "SimplifyVBinOp only works on vectors!"); - EVT EltType = VT.getVectorElementType(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Shuffle = XformToShuffleWithZero(N); @@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { break; } - // If the vector element type is not legal, the BUILD_VECTOR operands - // are promoted and implicitly truncated. Make that explicit here. - if (LHSOp.getValueType() != EltType) - LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp); - if (RHSOp.getValueType() != EltType) - RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp); - - SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType, + EVT VT = LHSOp.getValueType(); + assert(RHSOp.getValueType() == VT && + "SimplifyVBinOp with different BUILD_VECTOR element types"); + SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && @@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { AddToWorkList(FoldOp.getNode()); } - if (Ops.size() == LHS.getNumOperands()) { - EVT VT = LHS.getValueType(); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, - &Ops[0], Ops.size()); - } + if (Ops.size() == LHS.getNumOperands()) + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + LHS.getValueType(), &Ops[0], Ops.size()); } return SDValue(); @@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { + // Cannot simplify select with vector condition + if (TheSelect->getOperand(0).getValueType().isVector()) return false; + // If this is a select from two identical things, try to pull the operation // through the select. - if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ - // If this is a load and the token chain is identical, replace the select - // of two loads with a load through a select of the address to load from. - // This triggers in things like "select bool X, 10.0, 123.0" after the FP - // constants have been dropped into the constant pool. - if (LHS.getOpcode() == ISD::LOAD && + if (LHS.getOpcode() != RHS.getOpcode() || + !LHS.hasOneUse() || !RHS.hasOneUse()) + return false; + + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // Token chains must be identical. + if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - !cast<LoadSDNode>(LHS)->isVolatile() && - !cast<LoadSDNode>(RHS)->isVolatile() && - // Token chains must be identical. - LHS.getOperand(0) == RHS.getOperand(0)) { - LoadSDNode *LLD = cast<LoadSDNode>(LHS); - LoadSDNode *RLD = cast<LoadSDNode>(RHS); - - // If this is an EXTLOAD, the VT's must match. - if (LLD->getMemoryVT() == RLD->getMemoryVT()) { + LLD->isVolatile() || RLD->isVolatile() || + // If this is an EXTLOAD, the VT's must match. + LLD->getMemoryVT() != RLD->getMemoryVT() || + // If this is an EXTLOAD, the kind of extension must match. + (LLD->getExtensionType() != RLD->getExtensionType() && + // The only exception is if one of the extensions is anyext. + LLD->getExtensionType() != ISD::EXTLOAD && + RLD->getExtensionType() != ISD::EXTLOAD) || // FIXME: this discards src value information. This is // over-conservative. It would be beneficial to be able to remember // both potential memory locations. Since we are discarding // src value info, don't do the transformation if the memory // locations are not in the default address space. - unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; - if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { - if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) - LLDAddrSpace = PT->getAddressSpace(); - } - if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { - if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) - RLDAddrSpace = PT->getAddressSpace(); - } - SDValue Addr; - if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { - if (TheSelect->getOpcode() == ISD::SELECT) { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && - (!RLD->hasAnyUseOfValue(1) || - !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); - } - } else { - // Check that the condition doesn't reach either load. If so, folding - // this will induce a cycle into the DAG. - if ((!LLD->hasAnyUseOfValue(1) || - (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && - (!RLD->hasAnyUseOfValue(1) || - (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && - !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), - TheSelect->getOperand(1), - LLD->getBasePtr(), RLD->getBasePtr(), - TheSelect->getOperand(4)); - } - } - } - - if (Addr.getNode()) { - SDValue Load; - if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { - Load = DAG.getLoad(TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - LLD->getChain(), - Addr, 0, 0, - LLD->isVolatile(), - LLD->isNonTemporal(), - LLD->getAlignment()); - } else { - Load = DAG.getExtLoad(LLD->getExtensionType(), - TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - LLD->getChain(), Addr, 0, 0, - LLD->getMemoryVT(), - LLD->isVolatile(), - LLD->isNonTemporal(), - LLD->getAlignment()); - } + LLD->getPointerInfo().getAddrSpace() != 0 || + RLD->getPointerInfo().getAddrSpace() != 0) + return false; - // Users of the select now use the result of the load. - CombineTo(TheSelect, Load); + // Check that the select condition doesn't reach either load. If so, + // folding this will induce a cycle into the DAG. If not, this is safe to + // xform, so create a select of the addresses. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + SDNode *CondNode = TheSelect->getOperand(0).getNode(); + if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || + (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) + return false; + Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } else { // Otherwise SELECT_CC + SDNode *CondLHS = TheSelect->getOperand(0).getNode(); + SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + + if ((LLD->hasAnyUseOfValue(1) && + (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || + (LLD->hasAnyUseOfValue(1) && + (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + return false; - // Users of the old loads now use the new load's chain. We know the - // old-load value is dead now. - CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); - CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); - return true; - } - } - } + Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + + SDValue Load; + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + TheSelect->getDebugLoc(), + // FIXME: Discards pointer info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->isVolatile(), LLD->isNonTemporal(), + LLD->getAlignment()); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? + RLD->getExtensionType() : LLD->getExtensionType(), + TheSelect->getDebugLoc(), + TheSelect->getValueType(0), + // FIXME: Discards pointer info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), LLD->getAlignment()); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; } return false; @@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; - + EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); @@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::FABS, DL, VT, N3); } } - + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 // in it. This is a win when the constant is not otherwise available because @@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, }; const Type *FPTy = Elts[0]->getType(); const TargetData &TD = *TLI.getTargetData(); - + // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), @@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Zero = DAG.getIntPtrConstant(0); unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); SDValue One = DAG.getIntPtrConstant(EltSize); - + SDValue Cond = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), N0, N1, CC); @@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, CstOffset); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, + MachinePointerInfo::getConstantPool(), false, false, Alignment); } - } + } // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) @@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } } + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // where y is has a single bit set. + // A plaintext description would be, we can turn the SELECT_CC into an AND + // when the condition can be materialized as an all-ones register. Any + // single bit-test can be materialized as an all-ones register with + // shift-left and shift-right-arith. + if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && + N0->getValueType(0) == VT && + N1C && N1C->isNullValue() && + N2C && N2C->isNullValue()) { + SDValue AndLHS = N0->getOperand(0); + ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { + // Shift the tested bit over the sign bit. + APInt AndMask = ConstAndRHS->getAPIntValue(); + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy()); + SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is either + // all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy()); + SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } + } + // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { @@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { } /// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself. Provides base object and offset as results. +// to alias with anything but itself. Provides base object and offset as +// results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. @@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, Offset += C->getZExtValue(); } } - + // Return the underlying GlobalValue, and update the Offset. Return false // for GlobalAddressSDNode since the same GlobalAddress may be represented // by multiple nodes with different offsets. @@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, + const MDNode *TBAAInfo1, SDValue Ptr2, int64_t Size2, const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2) const { + unsigned SrcValueAlign2, + const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; @@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); - // If we know what the bases are, and they aren't identical, then we know they - // cannot alias. + // It is possible for different frame indices to alias each other, mostly + // when tail call optimization reuses return address slots for arguments. + // To catch this case, look up the actual index of frame indices to compute + // the real alias relationship. + if (isFrameIndex1 && isFrameIndex2) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); + Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); + return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + } + + // Otherwise, if we know what the bases are, and they aren't identical, then + // we know they cannot alias. if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) return false; @@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, (Size1 == Size2) && (SrcValueAlign1 > Size1)) { int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; - + // There is no overlap between these relatively aligned accesses of similar // size, return no alias. if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) return false; } - + if (CombinerGlobalAA) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); + AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), + AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, + const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlign) const { + unsigned &SrcValueAlign, + const MDNode *&TBAAInfo) const { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); Size = LD->getMemoryVT().getSizeInBits() >> 3; SrcValue = LD->getSrcValue(); SrcValueOffset = LD->getSrcValueOffset(); SrcValueAlign = LD->getOriginalAlignment(); + TBAAInfo = LD->getTBAAInfo(); return true; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); @@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); SrcValueAlign = ST->getOriginalAlignment(); + TBAAInfo = ST->getTBAAInfo(); } else { llvm_unreachable("FindAliasInfo expected a memory operand"); } @@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign); + const MDNode *SrcTBAAInfo; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); unsigned Depth = 0; - + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.back(); Chains.pop_back(); - - // For TokenFactor nodes, look at each operand and only continue up the - // chain until we find two aliases. If we've seen two aliases, assume we'll + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll // find more and revert to original chain since the xform is unlikely to be // profitable. - // - // FIXME: The depth check could be made to return the last non-aliasing + // + // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. if (Depth > 6 || Aliases.size() == 2) { @@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; + const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign); + OpSrcValueAlign, + OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + SrcTBAAInfo, OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign)) { + OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { // Look further up the chain. @@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // If a single operand then chain to it. We don't need to revisit it. return Aliases[0]; } - + // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, &Aliases[0], Aliases.size()); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index a4eed71e65c0..490b857b0e9c 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -55,6 +55,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" using namespace llvm; /// startNewBlock - Set the current block to which generated machine @@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } - + // If target-independent code couldn't handle the value, give target-specific // code a try. if (!Reg && isa<Constant>(V)) Reg = TargetMaterializeConstant(cast<Constant>(V)); - + // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. if (Reg != 0) { @@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { LocalValueMap[I] = Reg; return Reg; } - + unsigned &AssignedReg = FuncInfo.ValueMap[I]; if (AssignedReg == 0) // Use the new register. @@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + uint64_t Offs = TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) @@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { NIsKill = true; continue; } - + // N = N + Idx * ElementSize; uint64_t ElementSize = TD.getTypeAllocSize(Ty); std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); @@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) { return true; const Value *Address = DI->getAddress(); - if (!Address) + if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address)) return true; - if (isa<UndefValue>(Address)) - return true; - const AllocaInst *AI = dyn_cast<AllocaInst>(Address); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) - // Building the map above is target independent. Generating DBG_VALUE - // inline is target dependent; do this now. - (void)TargetSelectInstruction(cast<Instruction>(I)); + + unsigned Reg = 0; + unsigned Offset = 0; + if (const Argument *Arg = dyn_cast<Argument>(Address)) { + if (Arg->hasByValAttr()) { + // Byval arguments' frame index is recorded during argument lowering. + // Use this info directly. + Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + if (Offset) + Reg = TRI.getFrameRegister(*FuncInfo.MF); + } + } + if (!Reg) + Reg = getRegForValue(Address); + + if (Reg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(Reg, RegState::Debug).addImm(Offset) + .addMetadata(DI->getVariable()); return true; } case Intrinsic::dbg_value: { @@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) { } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - // Insert an undef so we can see what we dropped. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(0U).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } + DEBUG(dbgs() << "Dropping debug info for " << DI); + } return true; } case Intrinsic::eh_exception: { @@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) { bool FastISel::SelectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; - + // Check if the destination type is legal. Or as a special case, // it may be i1 if we're doing a truncate because that's // easy and somewhat common. @@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { InputReg, InputRegIsKill); if (!ResultReg) return false; - + UpdateValueMap(I, ResultReg); return true; } @@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) { return true; } - // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. + // Bitcasts of other values become reg-reg copies or BITCAST operators. EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple() || !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) // Unhandled type. Halt "fast" selection and bail. return false; - + unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); - + // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { @@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) { ResultReg).addReg(Op0); } } - - // If the reg-reg copy failed, select a BIT_CONVERT opcode. + + // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BIT_CONVERT, Op0, Op0IsKill); - + ISD::BITCAST, Op0, Op0IsKill); + if (!ResultReg) return false; - + UpdateValueMap(I, ResultReg); return true; } @@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) { return false; unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::BIT_CONVERT, OpReg, OpRegIsKill); + ISD::BITCAST, OpReg, OpRegIsKill); if (IntReg == 0) return false; @@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) { return false; ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); + ISD::BITCAST, IntResultReg, /*Kill=*/true); if (ResultReg == 0) return false; @@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { // Dynamic-sized alloca is not handled yet. return false; - + case Instruction::Call: return SelectCall(I); - + case Instruction::BitCast: return SelectBitCast(I); @@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT, return 0; } -unsigned FastISel::FastEmit_rr(MVT, MVT, +unsigned FastISel::FastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, bool /*Op0IsKill*/, unsigned /*Op1*/, bool /*Op1IsKill*/) { @@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); else { diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 5ef6404ee5d6..98582ba99f14 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 61c2a90e7edc..e309defba20f 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -31,11 +31,11 @@ using namespace llvm; /// CountResults - The results of target nodes have register or immediate -/// operands first, then an optional chain, and optional flag operands (which do +/// operands first, then an optional chain, and optional glue operands (which do /// not go into the resulting MachineInstr). unsigned InstrEmitter::CountResults(SDNode *Node) { unsigned N = Node->getNumValues(); - while (N && Node->getValueType(N - 1) == MVT::Flag) + while (N && Node->getValueType(N - 1) == MVT::Glue) --N; if (N && Node->getValueType(N - 1) == MVT::Other) --N; // Skip over chain result. @@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) { } /// CountOperands - The inputs to target nodes have any actual inputs first, -/// followed by an optional chain operand, then an optional flag operand. +/// followed by an optional chain operand, then an optional glue operand. /// Compute the number of actual operands that will go into the resulting /// MachineInstr. unsigned InstrEmitter::CountOperands(SDNode *Node) { unsigned N = Node->getNumOperands(); - while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) --N; if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) --N; // Ignore chain if it exists. @@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); return; } @@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; EVT VT = Node->getValueType(Op.getResNo()); - if (VT == MVT::Other || VT == MVT::Flag) + if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; if (User->isMachineOpcode()) { @@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, if (IsClone) VRBaseMap.erase(Op); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } } @@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, DenseMap<SDValue, unsigned> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && - Op.getValueType() != MVT::Flag && - "Chain and flag operands should occur at end of operand list!"); + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); @@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, BA->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && - Op.getValueType() != MVT::Flag && - "Chain and flag operands should occur at end of operand list!"); + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } @@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(VReg); - const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); - assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); - - // Figure out the register class to create for the destreg. - // Note that if we're going to directly use an existing register, - // it must be precisely the required class, and not a subclass - // thereof. - if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { - // Create the reg - assert(SRC && "Couldn't find source register class"); - VRBase = MRI->createVirtualRegister(SRC); - } + MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned SrcReg, DstReg, DefSubIdx; + if (DefMI && + TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && + SubIdx == DefSubIdx) { + // Optimize these: + // r1025 = s/zext r1024, 4 + // r1026 = extract_subreg r1025, 4 + // to a copy + // r1026 = copy r1024 + const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg); + VRBase = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + } else { + const TargetRegisterClass *TRC = MRI->getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::COPY), VRBase); + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); - // Add source, and subreg index - AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, - IsClone, IsCloned); - assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && - "Cannot yet extract from physregs"); - MI->getOperand(1).setSubReg(SubIdx); - MBB->insert(InsertPos, MI); + // Add source, and subreg index + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&& + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); + MBB->insert(InsertPos, MI); + } } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { SDValue N0 = Node->getOperand(0); @@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); const TargetRegisterClass *SRC = TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); - if (!SRC) - llvm_unreachable("Invalid subregister index in REG_SEQUENCE"); - if (SRC != RC) { + if (SRC && SRC != RC) { MRI->setRegClass(NewVReg, SRC); RC = SRC; } @@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, MBB->insert(InsertPos, MI); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; - isNew = isNew; // Silence compiler warning. + (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); } @@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // The MachineInstr constructor adds implicit-def operands. Scan through // these to determine which are dead. if (MI->getNumOperands() != 0 && - Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { // First, collect all used registers. SmallVector<unsigned, 8> UsedRegs; - for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) if (F->getOpcode() == ISD::CopyFromReg) UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); else { @@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { unsigned Reg = R->getReg(); - if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) UsedRegs.push_back(Reg); } } @@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // hook knows where in the block to insert the replacement code. MBB->insert(InsertPos, MI); - if (II.usesCustomInsertionHook()) { - // Insert this instruction into the basic block using a target - // specific inserter which may returns a new basic block. - bool AtEnd = InsertPos == MBB->end(); - MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); - if (NewMBB != MBB) { - if (AtEnd) - InsertPos = NewMBB->end(); - MBB = NewMBB; - } - return; - } - - // Additional results must be an physical register def. + // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; @@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); // If there are no uses, mark the register as dead now, so that // MachineLICM/Sink can see that it's dead. Don't do this if the - // node has a Flag value, for the benefit of targets still using - // Flag for values in physregs. - else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + // node has a Glue value, for the benefit of targets still using + // Glue for values in physregs. + else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) MI->addRegisterDead(Reg, TRI); } } // If the instruction has implicit defs and the node doesn't, mark the - // implicit def as dead. If the node has any flag outputs, we don't do this - // because we don't know what implicit defs are being used by flagged nodes. - if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag) + // implicit def as dead. If the node has any glue outputs, we don't do this + // because we don't know what implicit defs are being used by glued nodes. + if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue) if (const unsigned *IDList = II.getImplicitDefs()) { for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs(); i != e; ++i) @@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::INLINEASM: { unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) - --NumOps; // Ignore the flag operand. + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), @@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); - // Add the isAlignStack bit. - int64_t isAlignStack = - cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + // Add the HasSideEffect and isAlignStack bits. + int64_t ExtraInfo = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); - MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2981cd3f1cab..49c862ce3e0b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,11 +66,6 @@ class SelectionDAGLegalize { /// against each other, including inserted libcalls. SDValue LastCALLSEQ_END; - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; - enum LegalizeAction { Legal, // The target natively supports this operation. Promote, // This operation should be executed in a larger type. @@ -91,6 +87,9 @@ class SelectionDAGLegalize { // If someone requests legalization of the new node, return itself. if (From != To) LegalizedNodes.insert(std::make_pair(To, To)); + + // Transfer SDDbgValues. + DAG.TransferDbgValues(From, To); } public: @@ -172,6 +171,7 @@ private: SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); @@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, void SelectionDAGLegalize::LegalizeDAG() { LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node) { - if (Node->getOpcode() == ISD::CALLSEQ_END) - return Node; +static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { + // Nested CALLSEQ_START/END constructs aren't yet legal, + // but we can DTRT and handle them correctly here. + if (Node->getOpcode() == ISD::CALLSEQ_START) + depth++; + else if (Node->getOpcode() == ISD::CALLSEQ_END) { + depth--; + if (depth == 0) + return Node; + } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User)) + if (SDNode *Result = FindCallEndFromCallStart(User, depth)) return Result; } return 0; @@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) { /// FindCallStartFromCallEnd - Given a chained node that is part of a call /// sequence, find the CALLSEQ_START node that initiates the call sequence. static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + int nested = 0; assert(Node && "Didn't find callseq_start for a call??"); - if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; - - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - return FindCallStartFromCallEnd(Node->getOperand(0).getNode()); + while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { + Node = Node->getOperand(0).getNode(); + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + switch (Node->getOpcode()) { + default: + break; + case ISD::CALLSEQ_START: + if (!nested) + return Node; + nested--; + break; + case ISD::CALLSEQ_END: + nested++; + break; + } + } + return 0; } /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to @@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), - CPIdx, PseudoSourceValue::getConstantPool(), - 0, VT, false, false, Alignment); + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, false, false, + MachinePointerInfo::getConstantPool(), false, false, Alignment); } @@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); - int SVOffset = ST->getSrcValueOffset(); DebugLoc dl = ST->getDebugLoc(); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { @@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Expand to a bitconvert of the value to the integer type of the // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! - SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(), - SVOffset, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); } else { // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Perform the original store, only redirected to the stack slot. SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, NULL, 0, StoredVT, - false, false, 0); + Val, StackPtr, MachinePointerInfo(), + StoredVT, false, false, 0); SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); SmallVector<SDValue, 8> Stores; unsigned Offset = 0; @@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0, + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, + MachinePointerInfo(), false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, - ST->getSrcValue(), SVOffset + Offset, + ST->getPointerInfo().getWithOffset(Offset), ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // Increment the pointers. @@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, - NULL, 0, MemVT, false, false, 0); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), + MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getSrcValue(), SVOffset + Offset, + ST->getPointerInfo() + .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); @@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Store the two parts SDValue Store1, Store2; Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, - ST->getSrcValue(), SVOffset, NewStoredVT, + ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, - ST->getSrcValue(), SVOffset + IncrementSize, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); @@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, static SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, const TargetLowering &TLI) { - int SVOffset = LD->getSrcValueOffset(); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isTypeLegal(intVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset, LD->isVolatile(), + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, dl); - } else { - // Copy the value to a (aligned) stack slot using (unaligned) integer - // loads and stores, then do a (aligned) load from the stack slot. - EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); - SmallVector<SDValue, 8> Stores; - SDValue StackPtr = StackBase; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the original location. - SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset + Offset, LD->isVolatile(), - LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); - // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0, false, false, 0)); - // Increment the pointers. - Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - } + } - // The last copy may be partial. Do an extending load. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, - LD->getSrcValue(), SVOffset + Offset, - MemVT, LD->isVolatile(), - LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. - // On big-endian machines this requires a truncating store to ensure - // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - NULL, 0, MemVT, false, false, 0)); - - // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); - - // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, - NULL, 0, LoadedVT, false, false, 0); - - // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), false, false, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), + MinAlign(LD->getAlignment(), Offset)); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), MemVT, + false, false, 0)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo(), LoadedVT, false, false, 0); + + // Callers expect a MERGE_VALUES node. + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; if (TLI.isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } else { - Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset, NewLoadedVT, LD->isVolatile(), + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } @@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // Store the vector. SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, + MachinePointerInfo::getFixedStack(SPFI), false, false, 0); // Truncate or zero extend offset to target pointer type. @@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. - Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, - PseudoSourceValue::getFixedStack(SPFI), 0, EltVT, + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, - false, false, 0); + MachinePointerInfo::getFixedStack(SPFI), false, false, 0); } @@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Tmp1 = ST->getChain(); SDValue Tmp2 = ST->getBasePtr(); SDValue Tmp3; - int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { Tmp3 = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), MVT::i32); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, Alignment); - } else if (CFP->getValueType(0) == MVT::f64) { + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + } + + if (CFP->getValueType(0) == MVT::f64) { // If this target supports 64-bit registers, do a single 64-bit store. if (getTypeAction(MVT::i64) == Legal) { Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); - return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, Alignment); - } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + } + + if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { // Otherwise, if the target supports 32-bit registers, use 2 32-bit // stores. If the target supports neither 32- nor 64-bits, this // xform is certainly not worth it. const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); - SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32); SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, Alignment); + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, + ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); @@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode - TargetLowering::LegalizeAction Action; + TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; switch (Node->getOpcode()) { case ISD::INTRINSIC_W_CHAIN: @@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::FRAME_TO_ARGS_OFFSET: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_DISPATCHSETUP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case ISD::CALLSEQ_START: { + static int depth = 0; SDNode *CallEnd = FindCallEndFromCallStart(Node); // Recursively Legalize all of the inputs of the call end that do not lead @@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1, LastCALLSEQ_END); Tmp1 = LegalizeOp(Tmp1); @@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + + SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ; // Note that we are selecting this call! LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; + depth++; // Legalize the call, starting from the CALLSEQ_END. LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + depth--; + assert(depth >= 0 && "Un-matched CALLSEQ_START?"); + if (depth > 0) + LastCALLSEQ_END = Saved_LastCALLSEQ_END; return Result; } case ISD::CALLSEQ_END: @@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. // Do not try to legalize the target-specific arguments (#1+), except for // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){ + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ if (Tmp1 != Node->getOperand(0)) { SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; @@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getResNo()); } } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); // This finishes up call legalization. - IsLegalizingCall = false; - // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); if (Node->getNumValues() == 2) @@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Change base type to a different vector type. EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); - Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), + Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1)); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); Tmp4 = LegalizeOp(Tmp1.getValue(1)); break; } @@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { AddLegalizedOperand(SDValue(Node, 0), Tmp3); AddLegalizedOperand(SDValue(Node, 1), Tmp4); return Op.getResNo() ? Tmp4 : Tmp3; - } else { - EVT SrcVT = LD->getMemoryVT(); - unsigned SrcWidth = SrcVT.getSizeInBits(); - int SVOffset = LD->getSrcValueOffset(); - unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - - if (SrcWidth != SrcVT.getStoreSizeInBits() && - // Some targets pretend to have an i1 loading operation, and actually - // load an i8. This trick is correct for ZEXTLOAD because the top 7 - // bits are guaranteed to be zero; it helps the optimizers understand - // that these bits are zero. It is also useful for EXTLOAD, since it - // tells the optimizers that those bits are undefined. It would be - // nice to have an effective generic way of getting these benefits... - // Until such a way is found, don't insist on promoting i1 here. - (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { - // Promote to a byte-sized load if not loading an integral number of - // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. - unsigned NewWidth = SrcVT.getStoreSizeInBits(); - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); - SDValue Ch; - - // The extra bits are guaranteed to be zero, since we stored them that - // way. A zext load from NVT thus automatically gives zext from SrcVT. - - ISD::LoadExtType NewExtType = - ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - - Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, - Tmp1, Tmp2, LD->getSrcValue(), SVOffset, - NVT, isVolatile, isNonTemporal, Alignment); - - Ch = Result.getValue(1); // The chain. - - if (ExtType == ISD::SEXTLOAD) - // Having the top bits zero doesn't help when sign extending. - Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) - // All the top bits are guaranteed to be zero - inform the optimizers. - Result = DAG.getNode(ISD::AssertZext, dl, - Result.getValueType(), Result, - DAG.getValueType(SrcVT)); - - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); - } else if (SrcWidth & (SrcWidth - 1)) { - // If not loading a power-of-2 number of bits, expand as two loads. - assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned RoundWidth = 1 << Log2_32(SrcWidth); - assert(RoundWidth < SrcWidth); - unsigned ExtraWidth = SrcWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Load size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi, Ch; - unsigned IncrementSize; + } - if (TLI.isLittleEndian()) { - // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) - // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, - Tmp1, Tmp2, - LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, - LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + + if (TLI.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, Alignment); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(IncrementSize)); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Tmp1, Tmp2, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + + // Join the hi and lo parts. + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } - // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); + } else { + switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); + + if (isCustom) { + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); + } } else { - // Big endian - avoid unaligned loads. - // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 - // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, - LD->getSrcValue(), SVOffset, RoundVT, isVolatile, - isNonTemporal, Alignment); - - // Load the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - Node->getValueType(0), dl, Tmp1, Tmp2, - LD->getSrcValue(), SVOffset + IncrementSize, - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - - // Build a factor node to remember that this load is independent of - // the other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Move the top bits to the right place. - Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); - - // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); - } - - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); - } else { - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - isCustom = true; - // FALLTHROUGH - case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); - - if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); - } - } else { - // If this is an unaligned load and the target doesn't support it, - // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getTargetData()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); - } + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); + if (LD->getAlignment() < ABIAlignment){ + Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()), + DAG, TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); } } - break; - case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); - break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); - } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + } + break; + case TargetLowering::Expand: + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) { + SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + unsigned ExtendOp; + switch (ExtType) { + case ISD::EXTLOAD: + ExtendOp = (SrcVT.isFloatingPoint() ? + ISD::FP_EXTEND : ISD::ANY_EXTEND); break; + case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; + default: llvm_unreachable("Unexpected extend load type!"); } - // FIXME: This does not work for vectors on most targets. Sign- and - // zero-extend operations are currently folded into extending loads, - // whether they are legal or not, and then we end up here without any - // support for legalizing them. - assert(ExtType != ISD::EXTLOAD && - "EXTLOAD should always be supported!"); - // Turn the unsupported load into an EXTLOAD followed by an explicit - // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, - Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); - SDValue ValRes; - if (ExtType == ISD::SEXTLOAD) - ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, - Result.getValueType(), - Result, DAG.getValueType(SrcVT)); - else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); break; } + // FIXME: This does not work for vectors on most targets. Sign- and + // zero-extend operations are currently folded into extending loads, + // whether they are legal or not, and then we end up here without any + // support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an explicit + // zero/sign extend inreg. + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + break; } - - // Since loads produce two values, make sure to remember that we legalized - // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; } case ISD::STORE: { StoreSDNode *ST = cast<StoreSDNode>(Node); Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. - int SVOffset = ST->getSrcValueOffset(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case TargetLowering::Promote: assert(VT.isVector() && "Unknown legal promote case!"); - Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl, + Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getSrcValue(), SVOffset, isVolatile, + ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); break; } @@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, NVT, isVolatile, isNonTemporal, - Alignment); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (TLI.isLittleEndian()) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, RoundVT, + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); // Store the remaining ExtraWidth bits. @@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), - SVOffset + IncrementSize, ExtraVT, isVolatile, - isNonTemporal, + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); } else { // Big endian - avoid unaligned stores. @@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); - Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), - SVOffset, RoundVT, isVolatile, isNonTemporal, - Alignment); + Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset + IncrementSize, ExtraVT, isVolatile, - isNonTemporal, + Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); } @@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, isNonTemporal, - Alignment); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); break; } } @@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); // Add the offset to the index. unsigned EltSize = @@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), false, false, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), + false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { + assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + + SDValue Vec = Op.getOperand(0); + SDValue Part = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Store the value to a temporary stack slot, then LOAD the returned part. + + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); + + // First store the whole vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + false, false, 0); + + // Then store the inserted part. + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, Idx.getValueType())); + + if (Idx.getValueType().bitsGT(TLI.getPointerTy())) + Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); else - return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, - NULL, 0, Vec.getValueType().getVectorElementType(), - false, false, 0); + Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + + SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + StackPtr); + + // Store the subvector. + Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + MachinePointerInfo(), false, false, 0); + + // Finally, load the updated vector. + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, + false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { DebugLoc dl = Node->getDebugLoc(); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; @@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // element type, only store the bits necessary. if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), EltVT, false, false, 0)); } else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, SV, Offset, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), false, false, 0)); } @@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits()); if (isTypeLegal(IVT)) { // Convert to an integer with the same sign bit. - SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); + SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2); } else { // Store the float to memory, then load the sign part out as an integer. MVT LoadTy = TLI.getPointerTy(); @@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); // Then store the float to it. SDValue Ch = - DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0, + DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(), false, false, 0); if (TLI.isBigEndian()) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. - SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0); + SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), + false, false, 0); } else { // Little endian SDValue LoadPtr = StackPtr; // The float may be wider than the integer we are going to load. Advance @@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr, DAG.getIntPtrConstant(ByteOffset)); // Load a legal integer containing the sign bit. - SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0); + SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), + false, false, 0); // Move the sign bit to the top bit of the loaded integer. unsigned BitShift = LoadTy.getSizeInBits() - (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Align > StackAlign) SP = DAG.getNode(ISD::AND, dl, VT, SP, DAG.getConstant(-(uint64_t)Align, VT)); @@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); int SPFI = StackPtrFI->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); unsigned SlotSize = SlotVT.getSizeInBits(); @@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, if (SrcSize > SlotSize) Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, SlotVT, false, false, SrcAlign); + PtrInfo, SlotVT, false, false, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - SV, 0, false, false, SrcAlign); + PtrInfo, false, false, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) - return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false, - DestAlign); + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, + false, false, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, - false, false, DestAlign); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, + PtrInfo, SlotVT, false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, + MachinePointerInfo::getFixedStack(SPFI), Node->getValueType(0).getVectorElementType(), false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, - PseudoSourceValue::getFixedStack(SPFI), 0, + MachinePointerInfo::getFixedStack(SPFI), false, false, 0); } @@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, Alignment); } @@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // Splice the libcall in wherever FindInputOutputChains tells us to. const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + bool isTailCall = isInTailCallPosition(DAG, Node, TLI); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), false, + 0, TLI.getLibcallCallingConv(LC), isTailCall, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). @@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); @@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, } // store the lo of the constructed double - based on integer input SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, - Op0Mapped, Lo, NULL, 0, + Op0Mapped, Lo, MachinePointerInfo(), false, false, 0); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32); // store the hi of the constructed double - biased exponent - SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0, - false, false, 0); + SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, + MachinePointerInfo(), + false, false, 0); // load the constructed double - SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0, - false, false, 0); + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, + MachinePointerInfo(), false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, DAG.getConstant(32, MVT::i64)); SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); - SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); - SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); + SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } - // Implementation of unsigned i64 to f32. This implementation has the - // advantage of performing rounding correctly. + // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + if (!isSigned) { + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); + + SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy()); + SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, MVT::i64); + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + //pseudo-op, or, even better, for whole-function isel. + SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); + return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + } + + // Otherwise, implement the fully general conversion. EVT SHVT = TLI.getShiftAmountTy(); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, @@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), - ISD::SETUGE); + ISD::SETUGE); SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, DAG.getIntPtrConstant(0)); - } SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); @@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue FudgeInReg; if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), false, false, Alignment); else { FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, + MachinePointerInfo::getConstantPool(), MVT::f32, false, false, Alignment)); } @@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { } } +/// SplatByte - Distribute ByteVal over NumBits bits. +// FIXME: Move this helper to a common place. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { + APInt Val = APInt(NumBits, ByteVal); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + return Val; +} + /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, @@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, switch (Opc) { default: assert(0 && "Cannot expand this yet!"); case ISD::CTPOP: { - static const uint64_t mask[6] = { - 0x5555555555555555ULL, 0x3333333333333333ULL, - 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, - 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL - }; EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(); - unsigned len = VT.getSizeInBits(); - for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { - //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) - unsigned EltSize = VT.isVector() ? - VT.getVectorElementType().getSizeInBits() : len; - SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT); - SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); - Op = DAG.getNode(ISD::ADD, dl, VT, - DAG.getNode(ISD::AND, dl, VT, Op, Tmp2), - DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3), - Tmp2)); - } + unsigned Len = VT.getSizeInBits(); + + assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && + "CTPOP not implemented for this type."); + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + + SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT); + SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT); + SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT); + SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + Op = DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, ShVT)); + return Op; } case ISD::CTLZ: { @@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::PREFETCH: case ISD::VAEND: case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_DISPATCHSETUP: + // If the target didn't expand these, there's nothing to do, so just + // preserve the chain and be done. Results.push_back(Node->getOperand(0)); break; case ISD::EH_SJLJ_SETJMP: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; @@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TargetLowering::ArgListTy Args; std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), @@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // By default, atomic intrinsics are marked Legal and lowered. Targets // which don't support them directly, however, may want libcalls, in which // case they mark them Expand, and we get here. - // FIXME: Unimplemented for now. Add libcalls. case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TargetLowering::ArgListTy Args; std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); @@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::FP_ROUND: - case ISD::BIT_CONVERT: + case ISD::BITCAST: Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), Node->getValueType(0), dl); Results.push_back(Tmp1); @@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); - const uint64_t zero[] = {0, 0}; - APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APFloat apf(APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); @@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp2 = Node->getOperand(1); unsigned Align = Node->getConstantOperandVal(3); - SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, - false, false, 0); + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TLI.getPointerTy())); VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, - DAG.getConstant(-Align, + DAG.getConstant(-(int64_t)Align, TLI.getPointerTy())); } @@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, - false, false, 0); + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, + MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, + Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), false, false, 0)); Results.push_back(Results[0].getValue(1)); break; @@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(2), VS, 0, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0, - false, false, 0); + Node->getOperand(2), MachinePointerInfo(VS), + false, false, 0); + Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); break; } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_SUBVECTOR: Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; + case ISD::INSERT_SUBVECTOR: + Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); + break; case ISD::CONCAT_VECTORS: { Results.push_back(ExpandVectorBuildThroughStack(Node)); break; @@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else { - // FIXME: We should be able to fall back to a libcall with an illegal - // type in some cases. - // Also, we can fall back to a division in some cases, but that's a big - // performance hit in the general case. - assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() * 2)) && - "Don't know how to expand this operation yet!"); + } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() * 2))) { EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); @@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, DAG.getIntPtrConstant(1)); + } else { + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + + SDValue Ret = ExpandLibCall(LC, Node, isSigned); + BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); + TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, + DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); + TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); } if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, - PseudoSourceValue::getJumpTable(), 0, MemVT, + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(), MemVT, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { @@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, case ISD::XOR: { unsigned ExtOp, TruncOp; if (OVT.isVector()) { - ExtOp = ISD::BIT_CONVERT; - TruncOp = ISD::BIT_CONVERT; + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; } else { assert(OVT.isInteger() && "Cannot promote logic operation"); ExtOp = ISD::ANY_EXTEND; @@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector()) { - ExtOp = ISD::BIT_CONVERT; - TruncOp = ISD::BIT_CONVERT; + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; } else if (Node->getValueType(0).isInteger()) { ExtOp = ISD::ANY_EXTEND; TruncOp = ISD::TRUNCATE; @@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, cast<ShuffleVectorSDNode>(Node)->getMask(Mask); // Cast the two input vectors. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); - Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); // Convert the shuffle mask to the right # elements. Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 650ee5a0721c..27752123aac4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N)); @@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { SetSoftenedFloat(SDValue(N, ResNo), R); } -SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { unsigned Size = NVT.getSizeInBits(); // Mask = ~(1 << (Size-1)) - SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1), - NVT); + APInt API = APInt::getAllOnesValue(Size); + API.clearBit(Size-1); + SDValue Mask = DAG.getConstant(API, NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); } @@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getSrcValue(), L->getSrcValueOffset(), NVT, + L->getPointerInfo(), NVT, L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Do a non-extending load followed by FP_EXTEND. NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), dl, L->getChain(), - L->getBasePtr(), L->getOffset(), - L->getSrcValue(), L->getSrcValueOffset(), + L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to @@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to soften this operator's operand!"); - case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; @@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, } } -SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), +SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); } @@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } @@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; - case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; @@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); - Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, - LD->getSrcValue(), LD->getSrcValueOffset(), - LD->getMemoryVT(), LD->isVolatile(), + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); // Remember the chain. @@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to expand this operator's operand!"); - case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; @@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { GetExpandedOp(ST->getValue(), Lo, Hi); return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, - ST->getSrcValue(), ST->getSrcValueOffset(), + ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f8c589071921..f0752df80f12 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { llvm_unreachable("Do not know how to promote this operator!"); case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; - case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; @@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, N->getSrcValue(), N->getAlignment()); + Op2, N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op3 = GetPromotedInteger(N->getOperand(3)); SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), N->getChain(), N->getBasePtr(), - Op2, Op3, N->getSrcValue(), N->getAlignment()); + Op2, Op3, N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); @@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { case PromoteInteger: if (NOutVT.bitsEq(NInVT)) // The input promotes to the same size. Convert the promoted value. - return DAG.getNode(ISD::BIT_CONVERT, dl, - NOutVT, GetPromotedInteger(InOp)); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); break; case SoftenFloat: // Promote the integer operand by hand. @@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, BitConvertToInteger(GetScalarizedVector(InOp))); case SplitVector: { - // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split + // For example, i32 = BITCAST v2i16 on alpha. Convert the split // pieces of the input into integers and reassemble in the final type. SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); @@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()), JoinIntegers(Lo, Hi)); - return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); } case WidenVector: if (OutVT.bitsEq(NInVT)) // The input is widened to the same size. Convert to the widened value. - return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); + return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp)); } return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, @@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { // value was zero. This can be handled by setting the bit just off // the top of the original type. APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.set(OVT.getSizeInBits()); + TopBit.setBit(OVT.getSizeInBits()); Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); return DAG.getNode(ISD::CTTZ, dl, NVT, Op); } @@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); - SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), - N->getSrcValue(), N->getSrcValueOffset(), + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getPointerInfo(), N->getMemoryVT(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); @@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + // Promote the overflow bit trivially. + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT SmallVT = LHS.getValueType(); + + // To determine if the result overflowed in a larger type, we extend the input + // to the larger type, do the multiply, then check the high bits of the result + // to see if the overflow happened. + if (N->getOpcode() == ISD::SMULO) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } else { + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); + } + SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS); + + // Overflow occurred iff the high part of the result does not zero/sign-extend + // the low part. + SDValue Overflow; + if (N->getOpcode() == ISD::UMULO) { + // Unsigned overflow occurred iff the high part is non-zero. + SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getIntPtrConstant(SmallVT.getSizeInBits())); + Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, + DAG.getConstant(0, Hi.getValueType()), ISD::SETNE); + } else { + // Signed overflow occurred iff the high part does not sign extend the low. + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), + Mul, DAG.getValueType(SmallVT)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); + } + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Overflow); + return Mul; +} + SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); @@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { - assert(ResNo == 1 && "Only boolean result promotion currently supported!"); - return PromoteIntRes_Overflow(N); -} - //===----------------------------------------------------------------------===// // Integer Operand Promotion //===----------------------------------------------------------------------===// @@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to promote this operator's operand!"); case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; - case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break; case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; @@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); } -SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { // This should only occur in unusual situations like bitcasting to an // x86_fp80, so just turn it into a store+load return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); @@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); @@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(), - SVOffset, N->getMemoryVT(), + return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), + N->getMemoryVT(), isVolatile, isNonTemporal, Alignment); } @@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; @@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); + SplitInteger(Tmp.first, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Tmp.second); + break; + } + case ISD::AND: case ISD::OR: case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; @@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); } +/// Lower an atomic node to the appropriate builtin call. +std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + /// ExpandShiftByConstant - N is a shift by a value that needs to be expanded, /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); @@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Lo = Hi = DAG.getConstant(0, NVT); } else if (Amt > NVTBits) { Lo = DAG.getConstant(0, NVT); - Hi = DAG.getNode(ISD::SHL, dl, - NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy)); + Hi = DAG.getNode(ISD::SHL, DL, + NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy)); } else if (Amt == NVTBits) { Lo = DAG.getConstant(0, NVT); Hi = InL; @@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, TLI.isOperationLegalOrCustom(ISD::ADDC, TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { // Emit this X << 1 as X+X. - SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2); SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3); } else { - Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy)); - Hi = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SHL, dl, NVT, InH, + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)), - DAG.getNode(ISD::SRL, dl, NVT, InL, + DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(NVTBits-Amt, ShTy))); } return; @@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Lo = DAG.getConstant(0, NVT); Hi = DAG.getConstant(0, NVT); } else if (Amt > NVTBits) { - Lo = DAG.getNode(ISD::SRL, dl, + Lo = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy)); Hi = DAG.getConstant(0, NVT); } else if (Amt == NVTBits) { Lo = InH; Hi = DAG.getConstant(0, NVT); } else { - Lo = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SRL, dl, NVT, InL, + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)), - DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); } return; } assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); if (Amt > VTBits) { - Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits-1, ShTy)); } else if (Amt > NVTBits) { - Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, + Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt-NVTBits, ShTy)); - Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits-1, ShTy)); } else if (Amt == NVTBits) { Lo = InH; - Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits-1, ShTy)); } else { - Lo = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SRL, dl, NVT, InL, + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)), - DAG.getNode(ISD::SHL, dl, NVT, InH, + DAG.getNode(ISD::SHL, DL, NVT, InH, DAG.getConstant(NVTBits-Amt, ShTy))); - Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy)); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy)); } } @@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support // them. TODO: Teach operation legalization how to expand unsupported // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate - // a carry of type MVT::Flag, but there doesn't seem to be any way to + // a carry of type MVT::Glue, but there doesn't seem to be any way to // generate a value of this type in the expanded code sequence. bool hasCarry = TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? @@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (hasCarry) { - SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); HiOps[2] = Lo.getValue(1); @@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, HiOps[2] = Lo.getValue(1); Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); } + return; + } + + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { - if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); - SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], - ISD::SETULT); - SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], - ISD::SETULT); - SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); - } else { - Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); - SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), - LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); - } + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + SDValue Cmp = + DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); - SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; @@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); - SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; SDValue HiOps[3] = { LHSH, RHSH }; @@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue(); - Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT); Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); } @@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); - int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); @@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. @@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = @@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, NEVT, + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. - Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), isVolatile, isNonTemporal, Alignment); @@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); // Load the rest of the low bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } +void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2); + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(Ret, Lo, Hi); + + // Now calculate overflow. + SDValue Ofl; + if (N->getOpcode() == ISD::UMULO) + Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, + DAG.getConstant(0, VT), ISD::SETNE); + else { + SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT); + Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp); + Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE); + } + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to expand this operator's operand!"); - case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; @@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - int SVOffset = N->getSrcValueOffset(); unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); @@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); - return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, Alignment); - } else if (TLI.isLittleEndian()) { + } + + if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, Alignment); unsigned ExcessBits = @@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, NEVT, - isVolatile, isNonTemporal, + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - } else { - // Big-endian - high bits are at low addresses. Favor aligned stores at - // the cost of some bit-fiddling. - GetExpandedInteger(N->getValue(), Lo, Hi); - - EVT ExtVT = N->getMemoryVT(); - unsigned EBytes = ExtVT.getStoreSize(); - unsigned IncrementSize = NVT.getSizeInBits()/8; - unsigned ExcessBits = (EBytes - IncrementSize)*8; - EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), - ExtVT.getSizeInBits() - ExcessBits); + } - if (ExcessBits < NVT.getSizeInBits()) { - // Transfer high bits from the top of Lo to the bottom of Hi. - Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - ExcessBits, - TLI.getPointerTy())); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SRL, dl, NVT, Lo, - DAG.getConstant(ExcessBits, - TLI.getPointerTy()))); - } + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, + TLI.getPointerTy())); + Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, + TLI.getPointerTy()))); + } - // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), - SVOffset, HiVT, isVolatile, isNonTemporal, - Alignment); + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), + HiVT, isVolatile, isNonTemporal, Alignment); - // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - // Store the lowest ExcessBits bits in the second half. - Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), - SVOffset+IncrementSize, - EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - } + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getIntPtrConstant(IncrementSize)); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize)); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { @@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), - FudgePtr, NULL, 0, MVT::f32, + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + FudgePtr, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 6e56c98e9b56..cedda7e7075a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { if (M->getNodeId() == Processed) RemapValue(NewVal); DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + // OldVal may be a target of the ReplacedValues map which was marked + // NewNode to force reanalysis because it was updated. Ensure that + // anything that ReplacedValues mapped to OldVal will now be mapped + // all the way to NewVal. + ReplacedValues[OldVal] = NewVal; } // The original node continues to exist in the DAG, marked NewNode. } @@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { /// BitConvertToInteger - Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); - return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } @@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); - return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), + return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(), EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); } @@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, + MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0); + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), + false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided @@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, return CallInfo.first; } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + return CallInfo; +} + /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d56029208e61..3f81bbbe4061 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -99,7 +99,7 @@ private: return SoftenFloat; return ExpandFloat; } - + if (VT.getVectorNumElements() == 1) return ScalarizeVector; return SplitVector; @@ -192,6 +192,10 @@ private: SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); @@ -244,7 +248,7 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); - SDValue PromoteIntRes_BIT_CONVERT(SDNode *N); + SDValue PromoteIntRes_BITCAST(SDNode *N); SDValue PromoteIntRes_BSWAP(SDNode *N); SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); SDValue PromoteIntRes_Constant(SDNode *N); @@ -278,7 +282,7 @@ private: // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); - SDValue PromoteIntOp_BIT_CONVERT(SDNode *N); + SDValue PromoteIntOp_BITCAST(SDNode *N); SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); @@ -344,6 +348,7 @@ private: void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -352,7 +357,7 @@ private: // Integer Operand Expansion. bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); - SDValue ExpandIntOp_BIT_CONVERT(SDNode *N); + SDValue ExpandIntOp_BITCAST(SDNode *N); SDValue ExpandIntOp_BR_CC(SDNode *N); SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N); SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N); @@ -387,7 +392,7 @@ private: // Result Float to Integer Conversion. void SoftenFloatResult(SDNode *N, unsigned OpNo); - SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); @@ -426,7 +431,7 @@ private: // Operand Float to Integer Conversion. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); - SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N); + SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); @@ -515,7 +520,7 @@ private: SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_InregOp(SDNode *N); - SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); @@ -532,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); - SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N); + SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -557,7 +562,7 @@ private: void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -577,11 +582,12 @@ private: bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); - SDValue SplitVecOp_BIT_CONVERT(SDNode *N); + SDValue SplitVecOp_BITCAST(SDNode *N); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_FP_ROUND(SDNode *N); //===--------------------------------------------------------------------===// // Vector Widening Support: LegalizeVectorTypes.cpp @@ -603,7 +609,7 @@ private: // Widen Vector Result Promotion. void WidenVectorResult(SDNode *N, unsigned ResNo); - SDValue WidenVecRes_BIT_CONVERT(SDNode* N); + SDValue WidenVecRes_BITCAST(SDNode* N); SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); @@ -628,7 +634,7 @@ private: // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned ResNo); - SDValue WidenVecOp_BIT_CONVERT(SDNode *N); + SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -721,7 +727,7 @@ private: } // Generic Result Expansion. - void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -729,7 +735,7 @@ private: void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); // Generic Operand Expansion. - SDValue ExpandOp_BIT_CONVERT (SDNode *N); + SDValue ExpandOp_BITCAST (SDNode *N); SDValue ExpandOp_BUILD_VECTOR (SDNode *N); SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 9c2b1d9ed73d..a75ae87f3cbe 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -32,8 +32,7 @@ using namespace llvm; // little/big-endian machines, followed by the Hi/Lo part. This means that // they cannot be used as is on vectors, for which Lo is always stored first. -void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); @@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, case SoftenFloat: // Convert the integer operand instead. SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case ExpandInteger: case ExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case SplitVector: GetSplitVector(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case WidenVector: { - assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); @@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(InNVT.getVectorNumElements())); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; } } if (InVT.isVector() && OutVT.isInteger()) { - // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand + // Handle cases like i64 = BITCAST v1i64 on x86, where the operand // is legal but the result is not. EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); if (isTypeLegal(NVT)) { - SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); + SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp, @@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(SPFI); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0, + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); // Load the second half from the stack slot. - Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false, + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. @@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT OldVT = N->getValueType(0); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); - SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), OldVec); @@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), isVolatile, isNonTemporal, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), - SVOffset+IncrementSize, + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// -SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { DebugLoc dl = N->getDebugLoc(); if (N->getValueType(0).isVector()) { // An illegal expanding type is being converted to a legal vector type. // Make a two element vector out of the expanded parts and convert that // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on - // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. + // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), @@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { std::swap(Parts[0], Parts[1]); SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } } @@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. - return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); } SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { @@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); - SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, NewVecVT, N->getOperand(0)); SDValue Lo, Hi; @@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. - return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec); + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); } SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { @@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); - int SVOffset = St->getSrcValueOffset(); unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); @@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset, + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), isVolatile, isNonTemporal, Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); - Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(), - SVOffset + IncrementSize, + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 621c08724210..167dbe0377b3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j)); + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); else Operands[j] = Op.getOperand(j); } Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op); + return DAG.getNode(ISD::BITCAST, dl, VT, Op); } SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 93bc2d04928e..182f8fcbfbf3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to scalarize the result of this operator!"); - case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; + case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; @@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NewVT, N->getOperand(0)); } @@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getDebugLoc(), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), - N->getSrcValue(), N->getSrcValueOffset(), + N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->getOriginalAlignment()); @@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"; #endif llvm_unreachable("Do not know how to scalarize this operator's operand!"); - case ISD::BIT_CONVERT: - Res = ScalarizeVecOp_BIT_CONVERT(N); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { return false; } -/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs +/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs /// to be scalarized, it must be <1 x ty>. Convert the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), Elt); } @@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ if (N->isTruncatingStore()) return DAG.getTruncStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), - N->getSrcValue(), N->getSrcValueOffset(), + N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), + N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), N->getOriginalAlignment()); } @@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break; + case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; @@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); } -void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; @@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, GetExpandedOp(InOp, Lo, Hi); if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; } break; @@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // If the input is a vector that needs to be split, convert each split // piece of the input now. GetSplitVector(InOp, Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; } @@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (TLI.isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, @@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0)); VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); break; } } @@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - EVT IdxVT = Idx.getValueType(); DebugLoc dl = N->getDebugLoc(); EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); - Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, - DAG.getConstant(LoVT.getVectorNumElements(), IdxVT)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); // Store the new element. This may be larger than the vector element type, // so use a truncating store. @@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = TLI.getTargetData()->getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT, + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, false, false, 0); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0, + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), false, false, 0); // Increment the pointer to the other part. @@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false, - false, MinAlign(Alignment, IncrementSize)); + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Ch = LD->getChain(); SDValue Ptr = LD->getBasePtr(); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); - const Value *SV = LD->getSrcValue(); - int SVOffset = LD->getSrcValueOffset(); EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); @@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); + LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, + Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - SVOffset += IncrementSize; Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); + LD->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { #endif llvm_unreachable("Do not know how to split this operator's operand!"); - case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::FP_EXTEND: + case ISD::FTRUNC: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: @@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } -SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { - // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { + // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will // end up being split all the way down to individual components. Convert the // split pieces into integers and reassemble. SDValue Lo, Hi; @@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), JoinIntegers(Lo, Hi)); } SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { - // We know that the extracted result type is legal. For now, assume the index - // is a constant. + // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); @@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { EVT EltVT = VecVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(SPFI); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, - SV, 0, EltVT, false, false, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo(), EltVT, false, false, 0); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - int SVOffset = N->getSrcValueOffset(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); @@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) - Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, isVol, isNT, Alignment); else - Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), isVol, isNT, Alignment); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - SVOffset += IncrementSize; if (isTruncating) - Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVol, isNT, Alignment); else - Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), isVol, isNT, Alignment); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); @@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { DebugLoc DL = N->getDebugLoc(); - + // The input operands all must have the same type, and we know the result the // result type is valid. Convert this to a buildvector which extracts all the // input elements. @@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { } } - + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), &Elts[0], Elts.size()); } +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + DebugLoc DL = N->getDebugLoc(); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + + //===----------------------------------------------------------------------===// // Result Vector Widening @@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to widen the result of this operator!"); - case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; @@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { + while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } @@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); } - + // No legal vector version so unroll the vector operation and then widen. if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); - + // Since the operation can trap, apply operation on the original vector. EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); @@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { unsigned ConcatEnd = 0; // Current ConcatOps index. int Idx = 0; // Current Idx into input vectors. - // NumElts := greatest synthesizable vector size (at most WidenVT) + // NumElts := greatest legal vector size (at most WidenVT) // while (orig. vector has unhandled elements) { // take munches of size NumElts from the beginning and add to ConcatOps // NumElts := next smaller supported vector size or 1 @@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getIntPtrConstant(Idx)); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getIntPtrConstant(Idx)); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); @@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { do { NextSize *= 2; NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); - } while (!TLI.isTypeSynthesizable(NextVT)); + } while (!TLI.isTypeLegal(NextVT)); if (!VT.isVector()) { // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT @@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { if (VT == WidenVT) return ConcatOps[0]; } - + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { @@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) - return DAG.getNode(Opcode, dl, WidenVT, InOp); + if (InVTNumElts == WidenNumElts) { + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InOp); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + } } - if (TLI.isTypeSynthesizable(InWidenVT)) { + if (TLI.isTypeLegal(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(InVT); for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(Opcode, dl, WidenVT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, - &Ops[0], NumConcat)); + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, + &Ops[0], NumConcat); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVec); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); } if (InVTNumElts % WidenNumElts == 0) { + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, + InOp, DAG.getIntPtrConstant(0)); // Extract the input and convert the shorten input vector. - return DAG.getNode(Opcode, dl, WidenVT, - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, - InOp, DAG.getIntPtrConstant(0))); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVal); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); } } @@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { EVT EltVT = WidenVT.getVectorElementType(); unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; - for (i=0; i < MinElts; ++i) - Ops[i] = DAG.getNode(Opcode, dl, EltVT, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i))); + for (i=0; i < MinElts; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getIntPtrConstant(i)); + if (N->getNumOperands() == 1) + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); + else + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts); } SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { @@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } -SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); EVT VT = N->getValueType(0); @@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { InOp = GetPromotedInteger(InOp); InVT = InOp.getValueType(); if (WidenVT.bitsEq(InVT)) - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; case SoftenFloat: case ExpandInteger: @@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { InVT = InOp.getValueType(); if (WidenVT.bitsEq(InVT)) // The input widens to the same size. Convert to the widen value. - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); break; } unsigned WidenSize = WidenVT.getSizeInBits(); unsigned InSize = InVT.getSizeInBits(); - if (WidenSize % InSize == 0) { + // x86mmx is not an acceptable vector element type, so don't try. + if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. @@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } - if (TLI.isTypeSynthesizable(NewInVT)) { + if (TLI.isTypeLegal(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { else NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, &Ops[0], NewNumElts); - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SatOp, CvtCode); } - if (TLI.isTypeSynthesizable(InWidenVT)) { + if (TLI.isTypeLegal(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT InVT = InOp.getValueType(); - ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx); - if (CIdx) { - unsigned IdxVal = CIdx->getZExtValue(); - // Check if we can just return the input vector after widening. - if (IdxVal == 0 && InVT == WidenVT) - return InOp; - - // Check if we can extract from the vector. - unsigned InNumElts = InVT.getVectorNumElements(); - if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); - } + // Check if we can just return the input vector after widening. + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector<SDValue, 16> Ops(WidenNumElts); EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = Idx.getValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; - if (CIdx) { - unsigned IdxVal = CIdx->getZExtValue(); - for (i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal+i, IdxVT)); - } else { - Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx); - for (i=1; i < NumElts; ++i) { - SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, - DAG.getConstant(i, IdxVT)); - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx); - } - } + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getIntPtrConstant(IdxVal+i)); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to widen this operator's operand!"); - case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; + case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; @@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } -SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT InWidenVT = InOp.getValueType(); @@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { // Check if we can convert between two legal vector types and extract. unsigned InWidenSize = InWidenVT.getSizeInBits(); unsigned Size = VT.getSizeInBits(); - if (InWidenSize % Size == 0 && !VT.isVector()) { + // x86mmx is not an acceptable vector element type, so don't try. + if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeSynthesizable(NewVT)) { - SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getIntPtrConstant(0)); } @@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (Width == WidenEltWidth) return RetVT; - // See if there is larger legal integer than the element type to load/store + // See if there is larger legal integer than the element type to load/store unsigned VT; for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { @@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && + if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, if (NewLdTy != LdTy) { NumElts = Width / NewLdTy.getSizeInBits(); NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); - VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp); + VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); // Readjust position and vector position based on new load type Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; @@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], DAG.getIntPtrConstant(Idx++)); } - return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp); + return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, - LoadSDNode * LD) { +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, + LoadSDNode *LD) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector @@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const Value *SV = LD->getSrcValue(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset, + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), isVolatile, isNonTemporal, Align); LdChain.push_back(LdOp.getValue(1)); @@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, unsigned NumElts = WidenWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); - return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); } if (NewVT == WidenVT) return LdOp; @@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, NewVTWidth = NewVT.getSizeInBits(); } - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, - SVOffset+Offset, isVolatile, + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, MinAlign(Align, Increment)); LdChain.push_back(LdOp.getValue(1)); LdOps.push_back(LdOp); @@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, if (!LdOps[0].getValueType().isVector()) // All the loads are scalar loads. return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); - + // If the load contains vectors, build the vector using concat vector. // All of the vectors used to loads are power of 2 and the scalars load // can be combined to make a power of 2 vector. @@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - int SVOffset = LD->getSrcValueOffset(); unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const Value *SV = LD->getSrcValue(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain, unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, + LD->getPointerInfo(), LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, - SVOffset + Offset, LdEltVT, isVolatile, - isNonTemporal, Align); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, + LD->getPointerInfo().getWithOffset(Offset), LdEltVT, + isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); } @@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - const Value *SV = ST->getSrcValue(); - int SVOffset = ST->getSrcValueOffset(); unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); @@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, DAG.getIntPtrConstant(Idx)); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, - SVOffset + Offset, isVolatile, - isNonTemporal, + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, MinAlign(Align, Offset))); StWidth -= NewVTWidth; Offset += Increment; @@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, // Cast the vector to the scalar type we can store unsigned NumElts = ValWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); - SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); // Readjust index position based on new vector type Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getIntPtrConstant(Idx++)); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV, - SVOffset + Offset, isVolatile, - isNonTemporal, MinAlign(Align, Offset))); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset))); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, // and then store it. Instead, we extract each element and then store it. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - const Value *SV = ST->getSrcValue(); - int SVOffset = ST->getSrcValueOffset(); unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); SDValue ValOp = GetWidenedVector(ST->getValue()); DebugLoc dl = ST->getDebugLoc(); - + EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); @@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getIntPtrConstant(0)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV, - SVOffset, StEltVT, + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { @@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain, BasePtr, DAG.getIntPtrConstant(Offset)); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getIntPtrConstant(0)); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV, - SVOffset + Offset, StEltVT, - isVolatile, isNonTemporal, + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, + ST->getPointerInfo().getWithOffset(Offset), + StEltVT, isVolatile, isNonTemporal, MinAlign(Align, Offset))); } } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index ac2d33884b26..2dcb22957325 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DebugLoc.h" -#include "llvm/System/DataTypes.h" +#include "llvm/Support/DataTypes.h" namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index fae27294e364..e3da2084529a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { - if (SU->getNode()->getFlaggedNode()) + if (SU->getNode()->getGluedNode()) return NULL; SDNode *N = SU->getNode(); @@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; else if (VT == MVT::Other) TryUnfold = true; @@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; } @@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } } - for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { if (Node->getOpcode() == ISD::INLINEASM) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) - --NumOps; // Ignore the flag operand. + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 56f5ded50083..430283d5eff9 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls"); static RegisterScheduler tdListDAGScheduler("list-td", "Top-down list scheduler", createTDListDAGScheduler); - + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGList - The actual list scheduler implementation. This supports @@ -51,7 +51,7 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. /// SchedulingPriorityQueue *AvailableQueue; - + /// PendingQueue - This contains all of the instructions whose operands have /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands become available, the instruction is @@ -63,11 +63,12 @@ private: public: ScheduleDAGList(MachineFunction &mf, - SchedulingPriorityQueue *availqueue, - ScheduleHazardRecognizer *HR) - : ScheduleDAGSDNodes(mf), - AvailableQueue(availqueue), HazardRec(HR) { - } + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGList() { delete HazardRec; @@ -87,14 +88,14 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); - + // Build the scheduling graph. BuildSchedGraph(NULL); AvailableQueue->initNodes(SUnits); - + ListScheduleTopDown(); - + AvailableQueue->releaseState(); } @@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); - + // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) @@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); - + Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); @@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() { SUnits[i].isAvailable = true; } } - + // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector<SUnit*> NotReady; @@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() { assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); } } - + // If there are no instructions available, don't try to issue anything, and // don't advance the hazard recognizer. if (AvailableQueue->empty()) { @@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() { } SUnit *FoundSUnit = 0; - + bool HasNoopHazards = false; while (!AvailableQueue->empty()) { SUnit *CurSUnit = AvailableQueue->pop(); - + ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; } - + // Remember if this is a noop hazard. HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; - + NotReady.push_back(CurSUnit); } - + // Add the nodes that aren't ready back onto the available list. if (!NotReady.empty()) { AvailableQueue->push_all(NotReady); @@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // If this is a pseudo-op node, we don't want to increment the current // cycle. if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; + ++CurCycle; } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. @@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler with a -/// new hazard recognizer. This scheduler takes ownership of the hazard -/// recognizer and deletes it when done. +/// createTDListDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, - new LatencyPriorityQueue(), - IS->CreateTargetHazardRecognizer()); + return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 4c3e4e3b0768..0b548b277f4c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -20,6 +20,7 @@ #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,6 +66,10 @@ static RegisterScheduler "which tries to balance ILP and register pressure", createILPListDAGScheduler); +static cl::opt<bool> DisableSchedCycles( + "disable-sched-cycles", cl::Hidden, cl::init(false), + cl::desc("Disable cycle-level precision during preRA scheduling")); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -83,31 +88,56 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// CurCycle - The current scheduler state corresponds to this cycle. + unsigned CurCycle; + + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. unsigned NumLiveRegs; std::vector<SUnit*> LiveRegDefs; - std::vector<unsigned> LiveRegCycles; + std::vector<SUnit*> LiveRegGens; /// Topo - A topological ordering for SUnits which permits fast IsReachable /// and similar queries. ScheduleDAGTopologicalSort Topo; public: - ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, bool needlatency, - SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), - AvailableQueue(availqueue), Topo(SUnits) { - } + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits) { + + const TargetMachine &tm = mf.getTarget(); + if (DisableSchedCycles || !NeedLatency) + HazardRec = new ScheduleHazardRecognizer(); + else + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGRRList() { + delete HazardRec; delete AvailableQueue; } void Schedule(); + ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } + /// IsReachable - Checks if SU is reachable from TargetSU. bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { return Topo.IsReachable(SU, TargetSU); @@ -136,24 +166,37 @@ public: } private: + bool isReady(SUnit *SU) { + return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + void ReleasePred(SUnit *SU, const SDep *PredEdge); - void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ReleasePredecessors(SUnit *SU); void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); + void ScheduleNodeBottomUp(SUnit*); void CapturePred(SDep *PredEdge); - void ScheduleNodeBottomUp(SUnit*, unsigned); - void ScheduleNodeTopDown(SUnit*, unsigned); void UnscheduleNodeBottomUp(SUnit*); - void BacktrackBottomUp(SUnit*, unsigned, unsigned&); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); - void ListScheduleTopDown(); + + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); + void ScheduleNodeTopDown(SUnit*); + void ListScheduleTopDown(); + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. @@ -190,11 +233,13 @@ private: void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() - << " **********\n"); + << " '" << BB->getName() << "' **********\n"); + CurCycle = 0; + MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegCycles.resize(TRI->getNumRegs(), 0); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegGens.resize(TRI->getNumRegs(), NULL); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() { Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); - + + HazardRec->Reset(); + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. if (isBottomUp) ListScheduleBottomUp(); else ListScheduleTopDown(); - + AvailableQueue->releaseState(); } @@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { PredSU->isAvailable = true; - AvailableQueue->push(PredSU); + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(SU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } } } -void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { // Bottom up: release predecessors for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. - if (!LiveRegDefs[I->getReg()]) { + SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && + "interference on register dependence"); + LiveRegDefs[I->getReg()] = I->getSUnit(); + if (!LiveRegGens[I->getReg()]) { ++NumLiveRegs; - LiveRegDefs[I->getReg()] = I->getSUnit(); - LiveRegCycles[I->getReg()] = CurCycle; + LiveRegGens[I->getReg()] = SU; } } } } +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + if (DisableSchedCycles) { + assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); + return; + } + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = + isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + AvailableQueue->setCurCycle(NextCycle); + if (!HazardRec->isEnabled()) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + if (isBottomUp) + HazardRec->RecedeCycle(); + else + HazardRec->AdvanceCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (DisableSchedCycles) + return; + + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (isBottomUp && SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + if (!HazardRec->isEnabled()) + return; + + // Check for phys reg copy. + if (!SU->getNode()) + return; + + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (isBottomUp && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); + + if (!isBottomUp && SU->isCall) { + HazardRec->Reset(); + } +} + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. -void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); #endif - // FIXME: Handle noop hazard. + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node it's ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled intruction. + EmitNode(SU); + Sequence.push_back(SU); AvailableQueue->ScheduledNode(SU); - ReleasePredecessors(SU, CurCycle); + // Update liveness of predecessors before successors to avoid treating a + // two-address node as a live range def. + ReleasePredecessors(SU); // Release all the implicit physical register defs that are live. for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (I->isAssignedRegDep()) { - if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { - assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - assert(LiveRegDefs[I->getReg()] == SU && - "Physical register dependency violated?"); - --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegCycles[I->getReg()] = 0; - } + // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = NULL; + LiveRegGens[I->getReg()] = NULL; } } SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If HazardRec is disabled, count each inst as one cycle. + if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() + || AvailableQueue->empty()) + AdvanceToCycle(CurCycle + 1); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being /// unscheduled, incrcease the succ left count of its predecessors. Remove /// them from AvailableQueue if necessary. -void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); if (PredSU->isAvailable) { PredSU->isAvailable = false; @@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ + if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; - LiveRegCycles[I->getReg()] = 0; + LiveRegGens[I->getReg()] = NULL; } } for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { + // This becomes the nearest def. Note that an earlier def may still be + // pending if this is a two-address node. + LiveRegDefs[I->getReg()] = SU; if (!LiveRegDefs[I->getReg()]) { - LiveRegDefs[I->getReg()] = SU; ++NumLiveRegs; } - if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) - LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); + if (LiveRegGens[I->getReg()] == NULL || + I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) + LiveRegGens[I->getReg()] = I->getSUnit(); } } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; - AvailableQueue->push(SU); + if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } AvailableQueue->UnscheduledNode(SU); } +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BTCycle in order to schedule a specific node. -void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, - unsigned &CurCycle) { - SUnit *OldSU = NULL; - while (CurCycle > BtCycle) { - OldSU = Sequence.back(); +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { Sequence.pop_back(); if (SU->isSucc(OldSU)) // Don't try to remove SU from AvailableQueue. SU->isAvailable = false; + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); - --CurCycle; AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); + RestoreHazardCheckerBottomUp(); + + ReleasePending(); + ++NumBacktracks; } static bool isOperandOf(const SUnit *SU, SDNode *N) { for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getFlaggedNode()) { + SUNode = SUNode->getGluedNode()) { if (SUNode->isOperandOf(N)) return true; } @@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { - if (SU->getNode()->getFlaggedNode()) - return NULL; - SDNode *N = SU->getNode(); if (!N) return NULL; + if (SU->getNode()->getGluedNode()) + return NULL; + SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; else if (VT == MVT::Other) TryUnfold = true; @@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return NULL; } @@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } else { LoadSU = CreateNewSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); ComputeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != TID.getNumOperands(); ++i) { if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { @@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } if (TID.isCommutable()) NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); ComputeLatency(NewSU); // Record all the edges to and from the old SU, by category. @@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { RemovePred(SuccDep, D); D.setSUnit(NewSU); AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled + && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; } for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { SDep D = ChainSuccs[i]; @@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } // Add a data dependency to reflect that NewSU reads the value defined // by LoadSU. @@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. -static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector<SUnit*> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVector<unsigned, 4> &LRegs, const TargetRegisterInfo *TRI) { - bool Added = false; - if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { - if (RegAdded.insert(Reg)) { + for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { + + // Check if Ref is live. + if (!LiveRegDefs[Reg]) continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[Reg] == SU) continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(Reg)) LRegs.push_back(Reg); - Added = true; - } } - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) - if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { - if (RegAdded.insert(*Alias)) { - LRegs.push_back(*Alias); - Added = true; - } - } - return Added; } /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay /// scheduling of the given node to satisfy live physical register dependencies. /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. -bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, - SmallVector<unsigned, 4> &LRegs){ +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { if (NumLiveRegs == 0) return false; SmallSet<unsigned, 4> RegAdded; // If this node would clobber any "live" register, then it's not ready. + // + // If SU is the currently live definition of the same register that it uses, + // then we are free to schedule it. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->isAssignedRegDep()) + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, RegAdded, LRegs, TRI); } - for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { if (Node->getOpcode() == ISD::INLINEASM) { // Inline asm can clobber physical defs. unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) - --NumOps; // Ignore the flag operand. + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } + return !LRegs.empty(); } +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { + SmallVector<SUnit*, 4> Interferences; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + + SUnit *CurSU = AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + CurSU = AvailableQueue->pop(); + } + if (CurSU) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + Interferences[i]->isPending = false; + assert(Interferences[i]->isAvailable && "must still be available"); + AvailableQueue->push(Interferences[i]); + } + return CurSU; + } + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + SUnit *TrySU = Interferences[i]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + SUnit *BtSU = NULL; + unsigned LiveCycle = UINT_MAX; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } + } + if (!WillCreateCycle(TrySU, BtSU)) { + BacktrackBottomUp(TrySU, BtSU); + + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); + } + AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true)); + + // If one or more successors has been unscheduled, then the current + // node is no longer avaialable. Schedule a successor that's now + // available instead. + if (!TrySU->isAvailable) { + CurSU = AvailableQueue->pop(); + } + else { + CurSU = TrySU; + TrySU->isPending = false; + Interferences.erase(Interferences.begin()+i); + } + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = Interferences[0]; + SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, + /*isArtificial=*/true)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + assert(CurSU && "Unable to resolve live physical register dependencies!"); + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + Interferences[i]->isPending = false; + // May no longer be available due to backtracking. + if (Interferences[i]->isAvailable) { + AvailableQueue->push(Interferences[i]); + } + } + return CurSU; +} /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up /// schedulers. void ScheduleDAGRRList::ListScheduleBottomUp() { - unsigned CurCycle = 0; - // Release any predecessors of the special Exit node. - ReleasePredecessors(&ExitSU, CurCycle); + ReleasePredecessors(&ExitSU); // Add root to Available queue. if (!SUnits.empty()) { @@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. - SmallVector<SUnit*, 4> NotReady; - DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - bool Delayed = false; - LRegsMap.clear(); - SUnit *CurSU = AvailableQueue->pop(); - while (CurSU) { - SmallVector<unsigned, 4> LRegs; - if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) - break; - Delayed = true; - LRegsMap.insert(std::make_pair(CurSU, LRegs)); + DEBUG(dbgs() << "\n*** Examining Available\n"; + AvailableQueue->dump(this)); - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - NotReady.push_back(CurSU); - CurSU = AvailableQueue->pop(); - } + // Pick the best node to schedule taking all constraints into + // consideration. + SUnit *SU = PickNodeToScheduleBottomUp(); - // All candidates are delayed due to live physical reg dependencies. - // Try backtracking, code duplication, or inserting cross class copies - // to resolve it. - if (Delayed && !CurSU) { - for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { - SUnit *TrySU = NotReady[i]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; - - // Try unscheduling up to the point where it's safe to schedule - // this node. - unsigned LiveCycle = CurCycle; - for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { - unsigned Reg = LRegs[j]; - unsigned LCycle = LiveRegCycles[Reg]; - LiveCycle = std::min(LiveCycle, LCycle); - } - SUnit *OldSU = Sequence[LiveCycle]; - if (!WillCreateCycle(TrySU, OldSU)) { - BacktrackBottomUp(TrySU, LiveCycle, CurCycle); - // Force the current node to be scheduled before the node that - // requires the physical reg dep. - if (OldSU->isAvailable) { - OldSU->isAvailable = false; - AvailableQueue->remove(OldSU); - } - AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, /*isArtificial=*/true)); - // If one or more successors has been unscheduled, then the current - // node is no longer avaialable. Schedule a successor that's now - // available instead. - if (!TrySU->isAvailable) - CurSU = AvailableQueue->pop(); - else { - CurSU = TrySU; - TrySU->isPending = false; - NotReady.erase(NotReady.begin()+i); - } - break; - } - } + AdvancePastStalls(SU); - if (!CurSU) { - // Can't backtrack. If it's too expensive to copy the value, then try - // duplicate the nodes that produces these "too expensive to copy" - // values to break the dependency. In case even that doesn't work, - // insert cross class copies. - // If it's not too expensive, i.e. cost != -1, issue copies. - SUnit *TrySU = NotReady[0]; - SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; - assert(LRegs.size() == 1 && "Can't handle this yet!"); - unsigned Reg = LRegs[0]; - SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, VT); - const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. - SUnit *NewDef = 0; - if (DestRC) - NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; - if (!NewDef) { - // Issue copies, these can be expensive cross register class copies. - SmallVector<SUnit*, 2> Copies; - InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"); - AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - NewDef = Copies.back(); - } + ScheduleNodeBottomUp(SU); - DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"); - LiveRegDefs[Reg] = NewDef; - AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, - /*Reg=*/0, /*isNormalMemory=*/false, - /*isMustAlias=*/false, - /*isArtificial=*/true)); - TrySU->isAvailable = false; - CurSU = NewDef; - } - - assert(CurSU && "Unable to resolve live physical register dependencies!"); - } - - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { - NotReady[i]->isPending = false; - // May no longer be available due to backtracking. - if (NotReady[i]->isAvailable) - AvailableQueue->push(NotReady[i]); + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); } - NotReady.clear(); - - if (CurSU) - ScheduleNodeBottomUp(CurSU, CurCycle); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); } // Reverse the order if it is bottom up. std::reverse(Sequence.begin(), Sequence.end()); - + #ifndef NDEBUG VerifySchedule(isBottomUp); #endif @@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { +void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); @@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { - unsigned CurCycle = 0; AvailableQueue->setCurCycle(CurCycle); // Release any successors of the special Entry node. @@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() { SUnits[i].isAvailable = true; } } - + // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { SUnit *CurSU = AvailableQueue->pop(); - + if (CurSU) - ScheduleNodeTopDown(CurSU, CurCycle); + ScheduleNodeTopDown(CurSU); ++CurCycle; AvailableQueue->setCurCycle(CurCycle); } - + #ifndef NDEBUG VerifySchedule(isBottomUp); #endif @@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() { //===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Implementation +// RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// // // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers // to reduce register pressure. -// +// namespace { - template<class SF> - class RegReductionPriorityQueue; - - /// bu_ls_rr_sort - Priority function for bottom up register pressure - // reduction scheduler. - struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; - bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} - bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; +class RegReductionPQBase; + +struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false }; - // td_ls_rr_sort - Priority function for top down register pressure reduction - // scheduler. - struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; - td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} - td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; + RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// td_ls_rr_sort - Priority function for top down register pressure reduction +// scheduler. +struct td_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = false, + HasReadyFilter = false }; - // src_ls_rr_sort - Priority function for source order scheduler. - struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; - src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) - : SPQ(spq) {} - src_ls_rr_sort(const src_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; + RegReductionPQBase *SPQ; + td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + + bool operator()(const SUnit* left, const SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false }; - // hybrid_ls_rr_sort - Priority function for hybrid scheduler. - struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; - hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) - : SPQ(spq) {} - hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} + RegReductionPQBase *SPQ; + src_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + src_ls_rr_sort(const src_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; - bool operator()(const SUnit* left, const SUnit* right) const; +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = true }; - // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) - // scheduler. - struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; - ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) - : SPQ(spq) {} - ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) - : SPQ(RHS.SPQ) {} + RegReductionPQBase *SPQ; + hybrid_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; - bool operator()(const SUnit* left, const SUnit* right) const; + bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = true }; -} // end anonymous namespace + + RegReductionPQBase *SPQ; + ilp_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) + : SPQ(RHS.SPQ) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: + std::vector<SUnit*> Queue; + unsigned CurQueueId; + bool TracksRegPressure; + + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + MachineFunction &MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + +public: + RegReductionPQBase(MachineFunction &mf, + bool hasReadyFilter, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : SchedulingPriorityQueue(hasReadyFilter), + CurQueueId(0), TracksRegPressure(tracksrp), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + } + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + ScheduleHazardRecognizer* getHazardRec() { + return scheduleDAG->getHazardRec(); + } + + void initNodes(std::vector<SUnit> &sunits); + + void addNode(const SUnit *SU); + + void updateNode(const SUnit *SU); + + void releaseState() { + SUnits = 0; + SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + } + + unsigned getNodePriority(const SUnit *SU) const; + + unsigned getNodeOrdering(const SUnit *SU) const { + return scheduleDAG->DAG->GetOrdering(SU->getNode()); + } + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); + } + + void remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != prior(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); + SU->NodeQueueId = 0; + } + + bool tracksRegPressure() const { return TracksRegPressure; } + + void dumpRegPressure() const; + + bool HighRegPressure(const SUnit *SU) const; + + bool MayReduceRegPressure(SUnit *SU); + + void ScheduledNode(SUnit *SU); + + void UnscheduledNode(SUnit *SU); + +protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { + static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; + } + + SF Picker; + +public: + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + Picker(this) {} + + bool isBottomUp() const { return SF::IsBottomUp; } + + bool isReady(SUnit *U) const { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + + SUnit *pop() { + if (Queue.empty()) return NULL; + + SUnit *V = popFromQueue(Queue, Picker); + V->NodeQueueId = 0; + return V; + } + + void dump(ScheduleDAG *DAG) const { + // Emulate pop() without clobbering NodeQueueIds. + std::vector<SUnit*> DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + if (isBottomUp()) + dbgs() << "Height " << SU->getHeight() << ": "; + else + dbgs() << "Depth " << SU->getDepth() << ": "; + SU->dump(DAG); + } + } +}; + +typedef RegReductionPriorityQueue<bu_ls_rr_sort> +BURegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<td_ls_rr_sort> +TDRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<src_ls_rr_sort> +SrcRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> +HybridBURRPriorityQueue; + +typedef RegReductionPriorityQueue<ilp_ls_rr_sort> +ILPBURRPriorityQueue; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. @@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { if (SethiUllmanNumber == 0) SethiUllmanNumber = 1; - + return SethiUllmanNumber; } -namespace { - template<class SF> - class RegReductionPriorityQueue : public SchedulingPriorityQueue { - std::vector<SUnit*> Queue; - SF Picker; - unsigned CurQueueId; - bool TracksRegPressure; - - protected: - // SUnits - The SUnits for the current graph. - std::vector<SUnit> *SUnits; - - MachineFunction &MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const TargetLowering *TLI; - ScheduleDAGRRList *scheduleDAG; - - // SethiUllmanNumbers - The SethiUllman number for each node. - std::vector<unsigned> SethiUllmanNumbers; - - /// RegPressure - Tracking current reg pressure per register class. - /// - std::vector<unsigned> RegPressure; - - /// RegLimit - Tracking the number of allocatable registers per register - /// class. - std::vector<unsigned> RegLimit; - - public: - RegReductionPriorityQueue(MachineFunction &mf, - bool tracksrp, - const TargetInstrInfo *tii, - const TargetRegisterInfo *tri, - const TargetLowering *tli) - : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { - if (TracksRegPressure) { - unsigned NumRC = TRI->getNumRegClasses(); - RegLimit.resize(NumRC); - RegPressure.resize(NumRC); - std::fill(RegLimit.begin(), RegLimit.end(), 0); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); - } - } - - void initNodes(std::vector<SUnit> &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); - } - - void addNode(const SUnit *SU) { - unsigned SUSize = SethiUllmanNumbers.size(); - if (SUnits->size() > SUSize) - SethiUllmanNumbers.resize(SUSize*2, 0); - CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); - } - - void updateNode(const SUnit *SU) { - SethiUllmanNumbers[SU->NodeNum] = 0; - CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); - } +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); - void releaseState() { - SUnits = 0; - SethiUllmanNumbers.clear(); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - } + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} - unsigned getNodePriority(const SUnit *SU) const { - assert(SU->NodeNum < SethiUllmanNumbers.size()); - unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; - if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) - // CopyToReg should be close to its uses to facilitate coalescing and - // avoid spilling. - return 0; - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::INSERT_SUBREG) - // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be - // close to their uses to facilitate coalescing. - return 0; - if (SU->NumSuccs == 0 && SU->NumPreds != 0) - // If SU does not have a register use, i.e. it doesn't produce a value - // that would be consumed (e.g. store), then it terminates a chain of - // computation. Give it a large SethiUllman number so it will be - // scheduled right before its predecessors that it doesn't lengthen - // their live ranges. - return 0xffff; - if (SU->NumPreds == 0 && SU->NumSuccs != 0) - // If SU does not have a register def, schedule it close to its uses - // because it does not lengthen any live ranges. - return 0; - return SethiUllmanNumbers[SU->NodeNum]; - } +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); +} - unsigned getNodeOrdering(const SUnit *SU) const { - return scheduleDAG->DAG->GetOrdering(SU->getNode()); - } +void RegReductionPQBase::addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} - bool empty() const { return Queue.empty(); } - - void push(SUnit *U) { - assert(!U->NodeQueueId && "Node in the queue already"); - U->NodeQueueId = ++CurQueueId; - Queue.push_back(U); - } +void RegReductionPQBase::updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} - SUnit *pop() { - if (empty()) return NULL; - std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), - E = Queue.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Queue.end())) - std::swap(*Best, Queue.back()); - Queue.pop_back(); - V->NodeQueueId = 0; - return V; - } +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; + return SethiUllmanNumbers[SU->NodeNum]; +} - void remove(SUnit *SU) { - assert(!Queue.empty() && "Queue is empty!"); - assert(SU->NodeQueueId != 0 && "Not in queue!"); - std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), - SU); - if (I != prior(Queue.end())) - std::swap(*I, Queue.back()); - Queue.pop_back(); - SU->NodeQueueId = 0; - } +//===----------------------------------------------------------------------===// +// Register Pressure Tracking +//===----------------------------------------------------------------------===// - bool HighRegPressure(const SUnit *SU) const { - if (!TLI) - return false; +void RegReductionPQBase::dumpRegPressure() const { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] + << '\n'); + } +} - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - continue; - SUnit *PredSU = I->getSUnit(); - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - continue; - } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] >= RegLimit[RCId]) - return true; // Reg pressure already high. - unsigned Cost = TLI->getRepRegClassCostFor(VT); - if (!PN->hasAnyUseOfValue(i)) - continue; - // Check if this increases register pressure of the specific register - // class to the point where it would cause spills. - if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) - return true; - } - } +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; - return false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; } + } + return false; +} - void ScheduledNode(SUnit *SU) { - if (!TracksRegPressure) - return; - - const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) { - if (N->getOpcode() != ISD::CopyToReg) - return; - } else { - unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::REG_SEQUENCE || - Opc == TargetOpcode::IMPLICIT_DEF) - return; - } +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { + const SDNode *N = SU->getNode(); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - continue; - SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs) - continue; - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - if (!PN->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - } + if (!N->isMachineOpcode() || !SU->NumSuccs) + return false; - // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() - // may transfer data dependencies to CopyToReg. - if (SU->NumSuccs && N->isMachineOpcode()) { - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) - // Register pressure tracking is imprecise. This can happen. - RegPressure[RCId] = 0; - else - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); - } - } + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; + } + return false; +} + +void RegReductionPQBase::ScheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; - dumpRegPressure(); + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + // FIXME: The ScheduleDAG currently loses information about which of a + // node's values is consumed by each dependence. Consequently, if the node + // defines multiple register classes, we don't know which to pressurize + // here. Instead the following loop consumes the register defs in an + // arbitrary order. At least it handles the common case of clustered loads + // to the same class. For precise liveness, each SDep needs to indicate the + // result number. But that tightly couples the ScheduleDAG with the + // SelectionDAG making updates tricky. A simpler hack would be to attach a + // value type or register class to SDep. + // + // The most important aspect of register tracking is balancing the increase + // here with the reduction further below. Note that this SU may use multiple + // defs in PredSU. The can't be determined here, but we've already + // compensated by reducing NumRegDefsLeft in PredSU during + // ScheduleDAGSDNodes::AddSchedEdges. + --PredSU->NumRegDefsLeft; + unsigned SkipRegDefs = PredSU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs) + continue; + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + break; } + } - void UnscheduledNode(SUnit *SU) { - if (!TracksRegPressure) - return; - - const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) { - if (N->getOpcode() != ISD::CopyToReg) - return; - } else { - unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::REG_SEQUENCE || - Opc == TargetOpcode::IMPLICIT_DEF) - return; - } + // We should have this assert, but there may be dead SDNodes that never + // materialize as SUnits, so they don't appear to generate liveness. + //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); + int SkipRegDefs = (int)SU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs > 0) + continue; + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { + // Register pressure tracking is imprecise. This can happen. But we try + // hard not to let it happen because it likely results in poor scheduling. + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + RegPressure[RCId] = 0; + } + else { + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + dumpRegPressure(); +} - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - continue; - SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs) - continue; - const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) { - if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } - continue; - } - unsigned POpc = PN->getMachineOpcode(); - if (POpc == TargetOpcode::IMPLICIT_DEF) - continue; - if (POpc == TargetOpcode::EXTRACT_SUBREG) { - EVT VT = PN->getOperand(0).getValueType(); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } else if (POpc == TargetOpcode::INSERT_SUBREG || - POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - continue; - } - unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); - if (!PN->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) - // Register pressure tracking is imprecise. This can happen. - RegPressure[RCId] = 0; - else - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); - } - } +void RegReductionPQBase::UnscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } - // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() - // may transfer data dependencies to CopyToReg. - if (SU->NumSuccs && N->isMachineOpcode()) { - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); - if (VT == MVT::Flag || VT == MVT::Other) - continue; - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); - } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only + // counts data deps. + if (PredSU->NumSuccsLeft != PredSU->Succs.size()) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); } - - dumpRegPressure(); + continue; } - - void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { - scheduleDAG = scheduleDag; + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG) { + EVT VT = PN->getOperand(0).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } else if (POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + EVT VT = PN->getValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; } - - void dumpRegPressure() const { - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) { - const TargetRegisterClass *RC = *I; - unsigned Id = RC->getID(); - unsigned RP = RegPressure[Id]; - if (!RP) continue; - DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] - << '\n'); - } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); } + } - protected: - bool canClobber(const SUnit *SU, const SUnit *Op); - void AddPseudoTwoAddrDeps(); - void PrescheduleNodesWithMultipleUses(); - void CalculateSethiUllmanNumbers(); - }; - - typedef RegReductionPriorityQueue<bu_ls_rr_sort> - BURegReductionPriorityQueue; - - typedef RegReductionPriorityQueue<td_ls_rr_sort> - TDRegReductionPriorityQueue; - - typedef RegReductionPriorityQueue<src_ls_rr_sort> - SrcRegReductionPriorityQueue; - - typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> - HybridBURRPriorityQueue; + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } - typedef RegReductionPriorityQueue<ilp_ls_rr_sort> - ILPBURRPriorityQueue; + dumpRegPressure(); } +//===----------------------------------------------------------------------===// +// Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + /// closestSucc - Returns the scheduled cycle of the successor which is /// closest to the current cycle. static unsigned closestSucc(const SUnit *SU) { @@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -template <typename RRSort> -static bool BURRSort(const SUnit *left, const SUnit *right, - const RegReductionPriorityQueue<RRSort> *SPQ) { +/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { + unsigned Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// UnitsSharePred - Return true if the two scheduling units share a common +/// data predecessor. +static bool UnitsSharePred(const SUnit *left, const SUnit *right) { + SmallSet<const SUnit*, 4> Preds; + for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Preds.insert(I->getSUnit()); + } + for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (Preds.count(I->getSUnit())) + return true; + } + return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { + if ((int)SPQ->getCurCycle() < Height) return true; + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return true; + return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, + RegReductionPQBase *SPQ) { + // If the two nodes share an operand and one of them has a single + // use that is a live out copy, favor the one that is live out. Otherwise + // it will be difficult to eliminate the copy if the instruction is a + // loop induction variable update. e.g. + // BB: + // sub r1, r3, #1 + // str r0, [r2, r3] + // mov r3, r1 + // cmp + // bne BB + bool SharePred = UnitsSharePred(left, right); + // FIXME: Only adjust if BB is a loop back edge. + // FIXME: What's the cost of a copy? + int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; + int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; + int LHeight = (int)left->getHeight() - LBonus; + int RHeight = (int)right->getHeight() - RBonus; + + bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && + BUHasStall(left, LHeight, SPQ); + bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && + BUHasStall(right, RHeight, SPQ); + + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + if (LStall) { + if (!RStall) + return 1; + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } else if (RStall) + return -1; + + // If either node is scheduling for latency, sort them by height/depth + // and latency. + if (!checkPref || (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency)) { + if (DisableSchedCycles) { + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } + else { + // If neither instruction stalls (!LStall && !RStall) then + // it's height is already covered so only its depth matters. We also reach + // this if both stall but have the same height. + unsigned LDepth = left->getDepth(); + unsigned RDepth = right->getDepth(); + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; + } + } + if (left->Latency != right->Latency) + return left->Latency > right->Latency ? 1 : -1; + } + return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); if (LPriority != RPriority) @@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right, if (LScratch != RScratch) return LScratch > RScratch; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - - if (left->getDepth() != right->getDepth()) - return left->getDepth() < right->getDepth(); + if (!DisableSchedCycles) { + int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + else { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); - assert(left->NodeQueueId && right->NodeQueueId && + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + } + + assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up -bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. -bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { return BURRSort(left, right, SPQ); } -bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + static const unsigned ReadyDelay = 3; + + if (SPQ->MayReduceRegPressure(SU)) return true; + + if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. - if (LHigh && !RHigh) + if (LHigh && !RHigh) { + DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); return true; - else if (!LHigh && RHigh) + } + else if (!LHigh && RHigh) { + DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); return false; + } else if (!LHigh && !RHigh) { - // Low register pressure situation, schedule for latency if possible. - bool LStall = left->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < left->getHeight(); - bool RStall = right->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < right->getHeight(); - // If scheduling one of the node will cause a pipeline stall, delay it. - // If scheduling either one of the node will cause a pipeline stall, sort - // them according to their height. - // If neither will cause a pipeline stall, try to reduce register pressure. - if (LStall) { - if (!RStall) - return true; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - } else if (RStall) - return false; - - // If either node is scheduling for latency, sort them by height and latency - // first. - if (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency) { - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - if (left->Latency != right->Latency) - return left->Latency > right->Latency; - } + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; } - return BURRSort(left, right, SPQ); } -bool ilp_ls_rr_sort::operator()(const SUnit *left, - const SUnit *right) const { +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + if (SU->getHeight() > CurCycle) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return SU->getHeight() <= CurCycle; +} + +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for @@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left, return BURRSort(left, right, SPQ); } -template<class SF> -bool -RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { +//===----------------------------------------------------------------------===// +// Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { if (SU->isTwoAddress) { unsigned Opc = SU->getNode()->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); @@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { return false; } -/// hasCopyToRegUse - Return true if SU has a value successor that is a -/// CopyToReg node. -static bool hasCopyToRegUse(const SUnit *SU) { - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - const SUnit *SuccSU = I->getSUnit(); - if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) - return true; - } - return false; -} - /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's /// physical register defs. static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, @@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getFlaggedNode()) { + SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; const unsigned *SUImpDefs = @@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, return false; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); - if (VT == MVT::Flag || VT == MVT::Other) + if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) continue; @@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// after N, which shortens the U->N live range, reducing /// register pressure. /// -template<class SF> -void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; @@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { if (PredSU->NumSuccs == 1) continue; // Avoid prescheduling to copies from virtual registers, which don't behave - // like other nodes from the perspective of scheduling // heuristics. + // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU->getNode()) if (N->getOpcode() == ISD::CopyFromReg && TargetRegisterInfo::isVirtualRegister @@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { /// one that has a CopyToReg use (more likely to be a loop induction update). /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. -template<class SF> -void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { +void RegReductionPQBase::AddPseudoTwoAddrDeps() { for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; if (!SU->isTwoAddress) continue; SDNode *Node = SU->getNode(); - if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) continue; + bool isLiveOut = hasOnlyLiveOutUses(SU); unsigned Opc = Node->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); unsigned NumRes = TID.getNumDefs(); @@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; if ((!canClobber(SuccSU, DUSU) || - (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || + (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" @@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { } } -/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all -/// scheduling units. -template<class SF> -void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { - SethiUllmanNumbers.assign(SUnits->size(), 0); - - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); -} - /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled /// predecessors of the successors of the SUnit SU. Stop when the provided /// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, +static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, unsigned Limit) { unsigned Sum = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); @@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (left->NumSuccsLeft != right->NumSuccsLeft) return left->NumSuccsLeft > right->NumSuccsLeft; - assert(left->NodeQueueId && right->NodeQueueId && + assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); return (left->NodeQueueId > right->NodeQueueId); } @@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { //===----------------------------------------------------------------------===// llvm::ScheduleDAGSDNodes * -llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - + BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - + TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - + SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } llvm::ScheduleDAGSDNodes * -llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); const TargetLowering *TLI = &IS->getTargetLowering(); - + HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } llvm::ScheduleDAGSDNodes * -llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); const TargetLowering *TLI = &IS->getTargetLowering(); - + ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); - return SD; + return SD; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index f1bf82ab145a..477c1ffe65d3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -34,8 +34,8 @@ using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf) { -} + : ScheduleDAG(mf), + InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isCall = Old->isCall; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; @@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { /// a specified operand is a physical register dependency. If so, returns the /// register and the cost of copying the register. static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, - const TargetRegisterInfo *TRI, + const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) @@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, } } -static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, - SelectionDAG *DAG) { +static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { SmallVector<EVT, 4> VTs; - SDNode *FlagDestNode = Flag.getNode(); + SDNode *GlueDestNode = Glue.getNode(); - // Don't add a flag from a node to itself. - if (FlagDestNode == N) return; + // Don't add glue from a node to itself. + if (GlueDestNode == N) return; - // Don't add a flag to something which already has a flag. - if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + // Don't add glue to something which already has glue. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return; for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) VTs.push_back(N->getValueType(I)); - if (AddFlag) - VTs.push_back(MVT::Flag); + if (AddGlue) + VTs.push_back(MVT::Glue); SmallVector<SDValue, 4> Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); - if (FlagDestNode) - Ops.push_back(Flag); + if (GlueDestNode) + Ops.push_back(Glue); SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); MachineSDNode::mmo_iterator Begin = 0, End = 0; @@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, MN->setMemRefs(Begin, End); } -/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. +/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. /// This function finds loads of the same base and different offsets. If the -/// offsets are not far apart (target specific), it add MVT::Flag inputs and +/// offsets are not far apart (target specific), it add MVT::Glue inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { @@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (NumLoads == 0) return; - // Cluster loads by adding MVT::Flag outputs and inputs. This also + // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - AddFlags(Lead, SDValue(0, 0), true, DAG); + AddGlue(Lead, SDValue(0, 0), true, DAG); - SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { - bool OutFlag = I < E - 1; + bool OutGlue = I < E - 1; SDNode *Load = Loads[I]; - AddFlags(Load, InFlag, OutFlag, DAG); + AddGlue(Load, InGlue, OutGlue, DAG); - if (OutFlag) - InFlag = SDValue(Load, Load->getNumValues() - 1); + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); ++LoadsClustered; } @@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // FIXME: Multiply by 2 because we may clone nodes during scheduling. // This is a temporary workaround. SUnits.reserve(NumNodes * 2); - + // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; SmallPtrSet<SDNode*, 64> Visited; Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); - + while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); - + // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) if (Visited.insert(NI->getOperand(i).getNode())) Worklist.push_back(NI->getOperand(i).getNode()); - + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. continue; - + // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - + SUnit *NodeSUnit = NewSUnit(NI); - - // See if anything is flagged to this node, if so, add them to flagged - // nodes. Nodes can have at most one flag input and one flag output. Flags - // are required to be the last operand and result of a node. - - // Scan up to find flagged preds. + + // See if anything is glued to this node, if so, add them to glued + // nodes. Nodes can have at most one glue input and one glue output. Glue + // is required to be the last operand and result of a node. + + // Scan up to find glued preds. SDNode *N = NI; while (N->getNumOperands() && - N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) { + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { N = N->getOperand(N->getNumOperands()-1).getNode(); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; } - - // Scan down to find any flagged succs. + + // Scan down to find any glued succs. N = NI; - while (N->getValueType(N->getNumValues()-1) == MVT::Flag) { - SDValue FlagVal(N, N->getNumValues()-1); - - // There are either zero or one users of the Flag result. - bool HasFlagUse = false; - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { + SDValue GlueVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Glue result. + bool HasGlueUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) - if (FlagVal.isOperandOf(*UI)) { - HasFlagUse = true; + if (GlueVal.isOperandOf(*UI)) { + HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; break; } - if (!HasFlagUse) break; + if (!HasGlueUse) break; } - - // If there are flag operands involved, N is now the bottom-most node - // of the sequence of nodes that are flagged together. + + // If there are glue operands involved, N is now the bottom-most node + // of the sequence of nodes that are glued together. // Update the SUnit. NodeSUnit->setNode(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. + InitNumRegDefsLeft(NodeSUnit); + // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } @@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; SDNode *MainNode = SU->getNode(); - + if (MainNode->isMachineOpcode()) { unsigned Opc = MainNode->getMachineOpcode(); const TargetInstrDesc &TID = TII->get(Opc); @@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (TID.isCommutable()) SU->isCommutable = true; } - + // Find all predecessors and successors of the group. - for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) { + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; @@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) SU->hasPhysRegDefs = true; } - + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); if (isPassiveNode(OpN)) continue; // Not scheduled. @@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (OpSU == SU) continue; // In the same group. EVT OpVT = N->getOperand(i).getValueType(); - assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); + assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; @@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } - SU->addPred(dep); + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + // Multiple register uses are combined in the same SUnit. For example, + // we could have a set of glued nodes with all their defs consumed by + // another set of glued nodes. Register pressure tracking sees this as + // a single use, so to keep pressure balanced we reduce the defs. + --OpSU->NumRegDefsLeft; + } } } } @@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents -/// flagged together nodes with a single SUnit. +/// glued together nodes with a single SUnit. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); @@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { AddSchedEdges(); } +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + if (!Node->isMachineOpcode()) { + if (Node->getOpcode() == ISD::CopyFromReg) + NodeNumDefs = 1; + else + NodeNumDefs = 0; + return; + } + unsigned POpc = Node->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) { + // No register need be allocated for this. + NodeNumDefs = 0; + return; + } + unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); + // Some instructions define regs that are not represented in the selection DAG + // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. + NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); + DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, + const ScheduleDAGSDNodes *SD) + : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + InitNodeNumDefs(); + Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { + for (;Node;) { // Visit all glued nodes. + for (;DefIdx < NodeNumDefs; ++DefIdx) { + if (!Node->hasAnyUseOfValue(DefIdx)) + continue; + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) { + // Propagate the incoming (full-register) type. I doubt it's needed. + ValueType = Node->getOperand(0).getValueType(); + } + else { + ValueType = Node->getValueType(DefIdx); + } + ++DefIdx; + return; // Found a normal regdef. + } + Node = Node->getGluedNode(); + if (Node == NULL) { + return; // No values left to visit. + } + InitNodeNumDefs(); + } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { + assert(SU->NumRegDefsLeft == 0 && "expect a new node"); + for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { + assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); + ++SU->NumRegDefsLeft; + } +} + void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { @@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { return; } - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) { + if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; return; } - + // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. + // all nodes glued together into this SUnit. SU->Latency = 0; - for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) - if (N->isMachineOpcode()) { - SU->Latency += InstrItins. - getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); - } + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); } void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, @@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, if (ForceUnitLatencies()) return; - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) - return; - if (dep.getKind() != SDep::Data) return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode()) { - const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); - if (DefIdx >= II.getNumDefs()) - return; - int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); - if (DefCycle < 0) - return; - int UseCycle = 1; - if (Use->isMachineOpcode()) { - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); - } - if (UseCycle >= 0) { - int Latency = DefCycle - UseCycle + 1; - if (Latency >= 0) - dep.setLatency(Latency); - } + if (Use->isMachineOpcode()) + // Adjust the use operand index by num of defs. + OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); + int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && + !BB->succ_empty()) { + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + // This copy is a liveout value. It is likely coalesced, so reduce the + // latency so not to penalize the def. + // FIXME: need target specific adjustment here? + Latency = (Latency > 1) ? Latency - 1 : 1; } + if (Latency >= 0) + dep.setLatency(Latency); } void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { @@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { SU->getNode()->dump(DAG); dbgs() << "\n"; - SmallVector<SDNode *, 4> FlaggedNodes; - for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) - FlaggedNodes.push_back(N); - while (!FlaggedNodes.empty()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { dbgs() << " "; - FlaggedNodes.back()->dump(DAG); + GluedNodes.back()->dump(DAG); dbgs() << "\n"; - FlaggedNodes.pop_back(); + GluedNodes.pop_back(); } } @@ -507,37 +573,25 @@ namespace { }; } -// ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule uses to determine how to insert dbg_value -// instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, +/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, - SmallSet<unsigned, 8> &Seen) { - unsigned Order = DAG->GetOrdering(N); - if (!Order || !Seen.insert(Order)) - return; - - MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { - // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); - return; - } - - Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + DenseMap<SDValue, unsigned> &VRBaseMap, + unsigned Order) { if (!N->getHasDebugValue()) return; + // Opportunistically insert immediate dbg_value uses, i.e. those with source // order number right after the N. + MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N); for (unsigned i = 0, e = DVs.size(); i != e; ++i) { if (DVs[i]->isInvalidated()) continue; unsigned DVOrder = DVs[i]->getOrder(); - if (DVOrder == ++Order) { + if (!Order || DVOrder == ++Order) { MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); if (DbgMI) { Orders.push_back(std::make_pair(DVOrder, DbgMI)); @@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, } } +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, + InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = DAG->GetOrdering(N); + if (!Order || !Seen.insert(Order)) { + // Process any valid SDDbgValues even if node does not have any order + // assigned. + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); + return; + } + + MachineBasicBlock *BB = Emitter.getBlock(); + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + // Did not insert any instruction. + Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + return; + } + + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); +} + /// EmitSchedule - Emit the machine code in scheduled order. MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { @@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // For pre-regalloc scheduling, create instructions corresponding to the - // SDNode and any flagged SDNodes and append them to the block. + // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. EmitPhysRegCopy(SU, CopyVRBaseMap); continue; } - SmallVector<SDNode *, 4> FlaggedNodes; - for (SDNode *N = SU->getNode()->getFlaggedNode(); N; - N = N->getFlaggedNode()) - FlaggedNodes.push_back(N); - while (!FlaggedNodes.empty()) { - SDNode *N = FlaggedNodes.back(); - Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; + N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + SDNode *N = GluedNodes.back(); + Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); - FlaggedNodes.pop_back(); + GluedNodes.pop_back(); } Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap); @@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; -#ifndef NDEBUG - unsigned LastDIOrder = 0; -#endif for (; DI != DE && (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { -#ifndef NDEBUG - assert((*DI)->getOrder() >= LastDIOrder && - "SDDbgValue nodes must be in source order!"); - LastDIOrder = (*DI)->getOrder(); -#endif if ((*DI)->isInvalidated()) continue; MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 842fc8c72703..cc7310e4ca42 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -20,13 +20,13 @@ namespace llvm { /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. - /// + /// /// Edges between SUnits are initially based on edges in the SelectionDAG, /// and additional edges can be added by the schedulers as heuristics. /// SDNodes such as Constants, Registers, and a few others that are not /// interesting to schedulers are not allocated SUnits. /// - /// SDNodes with MVT::Flag operands are grouped along with the flagged + /// SDNodes with MVT::Glue operands are grouped along with the flagged /// nodes into a single SUnit so that they are scheduled together. /// /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output @@ -36,6 +36,7 @@ namespace llvm { class ScheduleDAGSDNodes : public ScheduleDAG { public: SelectionDAG *DAG; // DAG of the current basic block + const InstrItineraryData *InstrItins; explicit ScheduleDAGSDNodes(MachineFunction &mf); @@ -72,13 +73,17 @@ namespace llvm { /// predecessors / successors info nor the temporary scheduling states. /// SUnit *Clone(SUnit *N); - + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void InitNumRegDefsLeft(SUnit *SU); + /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); @@ -105,6 +110,30 @@ namespace llvm { virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + /// RegDefIter - In place iteration over the values defined by an + /// SUnit. This does not need copies of the iterator or any other STLisms. + /// The iterator creates itself, rather than being provided by the SchedDAG. + class RegDefIter { + const ScheduleDAGSDNodes *SchedDAG; + const SDNode *Node; + unsigned DefIdx; + unsigned NodeNumDefs; + EVT ValueType; + public: + RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + + bool IsValid() const { return Node != NULL; } + + EVT GetValue() const { + assert(IsValid() && "bad iterator"); + return ValueType; + } + + void Advance(); + private: + void InitNodeNumDefs(); + }; + private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ad06ebda5b00..2fb2f2d8aa1e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" @@ -44,7 +43,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/System/Mutex.h" +#include "llvm/Support/Mutex.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, /// BUILD_VECTOR where all of the elements are ~0 or undef. bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BIT_CONVERT) + if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { /// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BIT_CONVERT) + if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) { if (N->getOperand(0).getOpcode() == ISD::UNDEF) return false; unsigned NumElems = N->getNumOperands(); + if (NumElems == 1) + return false; for (unsigned i = 1; i < NumElems; ++i) { SDValue V = N->getOperand(i); if (V.getOpcode() != ISD::UNDEF) @@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, /// doNotCSE - Return true if CSE should not be performed for this node. static bool doNotCSE(SDNode *N) { - if (N->getValueType(0) == MVT::Flag) + if (N->getValueType(0) == MVT::Glue) return true; // Never CSE anything that produces a flag. switch (N->getOpcode()) { @@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) { // Check that remaining values produced are not flags. for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) - if (N->getValueType(i) == MVT::Flag) + if (N->getValueType(i) == MVT::Glue) return true; // Never CSE anything that produces a flag. return false; @@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) { bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { bool Erased = false; switch (N->getOpcode()) { - case ISD::EntryToken: - llvm_unreachable("EntryToken should not be in CSEMaps!"); - return false; case ISD::HANDLENODE: return false; // noop. case ISD::CONDCODE: assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && @@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { } default: // Remove it from the CSE Map. + assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); + assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); Erased = CSEMap.RemoveNode(N); break; } @@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { // Verify that the node was actually in one of the CSE maps, unless it has a // flag result (which cannot be CSE'd) or is one of the special cases that are // not subject to CSE. - if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && !N->isMachineOpcode() && !doNotCSE(N)) { N->dump(this); dbgs() << "\n"; @@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, return Node; } -/// VerifyNode - Sanity check the given node. Aborts if it is invalid. -void SelectionDAG::VerifyNode(SDNode *N) { +#ifndef NDEBUG +/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid. +static void VerifyNodeCommon(SDNode *N) { switch (N->getOpcode()) { default: break; @@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) { } } +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + // The SDNode allocators cannot be used to allocate nodes with fields that are + // not present in an SDNode! + assert(!isa<MemSDNode>(N) && "Bad MemSDNode!"); + assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!"); + assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!"); + assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!"); + assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!"); + assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!"); + assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!"); + assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!"); + assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!"); + assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!"); + assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!"); + assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!"); + assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!"); + assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!"); + assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!"); + assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!"); + assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!"); + assert(!isa<VTSDNode>(N) && "Bad VTSDNode!"); + assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!"); + + VerifyNodeCommon(N); +} + +/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is +/// invalid. +static void VerifyMachineNode(SDNode *N) { + // The MachineNode allocators cannot be used to allocate nodes with fields + // that are not present in a MachineNode! + // Currently there are no such nodes. + + VerifyNodeCommon(N); +} +#endif // NDEBUG + /// getEVTAlignment - Compute the default alignment value for the /// given type. /// @@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); ID.AddPointer(MD); - + void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clear(); + KnownOne.clearAllBits(); unsigned TrailZ = KnownZero.countTrailingOnes() + KnownZero2.countTrailingOnes(); unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + @@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, AllOnes, KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); - KnownOne2.clear(); - KnownZero2.clear(); + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); @@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // If the sign extended bits are demanded, we know that the sign // bit is demanded. - InSignBit.zext(BitWidth); + InSignBit = InSignBit.zext(BitWidth); if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; @@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::CTPOP: { unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); - KnownOne.clear(); + KnownOne.clearAllBits(); return; } case ISD::LOAD: { @@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask; - InMask.trunc(InBits); - KnownZero.trunc(InBits); - KnownOne.trunc(InBits); + APInt InMask = Mask.trunc(InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; return; } @@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask; - InMask.trunc(InBits); + APInt InMask = Mask.trunc(InBits); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. Temporarily set this bit in the mask for our callee. if (NewBits.getBoolValue()) InMask |= InSignBit; - KnownZero.trunc(InBits); - KnownOne.trunc(InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. @@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // If the sign bit wasn't actually demanded by our caller, we don't // want it set in the KnownZero and KnownOne result values. Reset the // mask and reapply it to the result values. - InMask = Mask; - InMask.trunc(InBits); + InMask = Mask.trunc(InBits); KnownZero &= InMask; KnownOne &= InMask; - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); // If the sign bit is known zero or one, the top bits match. if (SignBitKnownZero) @@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask; - InMask.trunc(InBits); - KnownZero.trunc(InBits); - KnownOne.trunc(InBits); + APInt InMask = Mask.trunc(InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); return; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask; - InMask.zext(InBits); - KnownZero.zext(InBits); - KnownOne.zext(InBits); + APInt InMask = Mask.zext(InBits); + KnownZero = KnownZero.zext(InBits); + KnownOne = KnownOne.zext(InBits); ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); break; } case ISD::AssertZext: { @@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } } // fall through - case ISD::ADD: { + case ISD::ADD: + case ISD::ADDE: { // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. @@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); - KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + if (Op.getOpcode() == ISD::ADD) { + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + return; + } + + // With ADDE, a carry bit may be added in, so we can only use this + // information if we know (at least) that the low two bits are clear. We + // then return to the caller that the low bit is unknown but that other bits + // are known zero. + if (KnownZeroOut >= 2) // ADDE + KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); return; } case ISD::SREM: @@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); - KnownOne.clear(); + KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; return; } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + if (unsigned Align = InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); + return; + } + break; + default: // Allow the target to implement this method for its nodes. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { @@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); } +/// isBaseWithConstantOffset - Return true if the specified operand is an +/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an +/// ISD::OR with a ConstantSDNode that is guaranteed to have the same +/// semantics as an ADD. This handles the equivalence: +/// X|Cst == X+Cst iff X&Cst = 0. +bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { + if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || + !isa<ConstantSDNode>(Op.getOperand(1))) + return false; + + if (Op.getOpcode() == ISD::OR && + !MaskedValueIsZero(Op.getOperand(0), + cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) + return false; + + return true; +} + + bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. if (NoNaNsFPMath) @@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, switch (Opcode) { default: break; case ISD::SIGN_EXTEND: - return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::TRUNCATE: - return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - const uint64_t zero[] = {0, 0}; // No compile time operations on ppcf128. if (VT == MVT::ppcf128) break; - APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); + APFloat apf(APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); return getConstantFP(apf, VT); } - case ISD::BIT_CONVERT: + case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(Val.bitsToFloat(), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) @@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, APInt api(VT.getSizeInBits(), 2, x); return getConstant(api, VT); } - case ISD::BIT_CONVERT: + case ISD::BITCAST: if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) @@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return Operand.getNode()->getOperand(0); } break; - case ISD::BIT_CONVERT: + case ISD::BITCAST: // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() - && "Cannot BIT_CONVERT between types of different sizes!"); + && "Cannot BITCAST between types of different sizes!"); if (VT == Operand.getValueType()) return Operand; // noop conversion. - if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x) - return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; @@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { // Don't CSE flag producing nodes + if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); @@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, "Shift operators return type must be the same as their first arg"); assert(VT.isInteger() && N2.getValueType().isInteger() && "Shifts only work on integers"); + // Verify that the shift amount VT is bit enough to hold valid shift + // amounts. This catches things like trying to shift an i1024 value by an + // i8, which is easy to fall into in generic code that uses + // TLI.getShiftAmount(). + assert(N2.getValueType().getSizeInBits() >= + Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to // handle them. Since we know the size of the shift has to be less than the @@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return getConstant(ShiftedVal.trunc(ElementSize), VT); } break; - case ISD::EXTRACT_SUBVECTOR: - if (N1.getValueType() == VT) // Trivial extraction. - return N1; + case ISD::EXTRACT_SUBVECTOR: { + SDValue Index = N2; + if (VT.isSimple() && N1.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + "Extract subvector VTs must be a vectors!"); + assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Extract subvector must be from larger vector to smaller vector!"); + + if (isa<ConstantSDNode>(Index.getNode())) { + assert((VT.getVectorNumElements() + + cast<ConstantSDNode>(Index.getNode())->getZExtValue() + <= N1.getValueType().getVectorNumElements()) + && "Extract subvector overflow!"); + } + + // Trivial extraction. + if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + return N1; + } break; } + } if (N1C) { if (N2C) { @@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { + if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2 }; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); @@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; - case ISD::BIT_CONVERT: + case ISD::INSERT_SUBVECTOR: { + SDValue Index = N3; + if (VT.isSimple() && N1.getValueType().isSimple() + && N2.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + N2.getValueType().isVector() && + "Insert subvector VTs must be a vectors"); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + "Insert subvector must be from smaller vector to larger vector!"); + if (isa<ConstantSDNode>(Index.getNode())) { + assert((N2.getValueType().getVectorNumElements() + + cast<ConstantSDNode>(Index.getNode())->getZExtValue() + <= VT.getVectorNumElements()) + && "Insert subvector overflow!"); + } + + // Trivial insertion. + if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + return N2; + } + break; + } + case ISD::BITCAST: // Fold bit_convert nodes from a type to themselves. if (N1.getValueType() == VT) return N1; @@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // Memoize node if it doesn't produce a flag. SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { + if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); @@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { &ArgChains[0], ArgChains.size()); } +/// SplatByte - Distribute ByteVal over NumBits bits. +static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { + APInt Val = APInt(NumBits, ByteVal); + unsigned Shift = 8; + for (unsigned i = NumBits; i > 8; i >>= 1) { + Val = (Val << Shift) | Val; + Shift <<= 1; + } + return Val; +} + /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { - APInt Val = APInt(NumBits, C->getZExtValue() & 255); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } + APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); if (VT.isInteger()) return DAG.getConstant(Val, VT); return DAG.getConstantFP(APFloat(Val), VT); } - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Value = DAG.getNode(ISD::OR, dl, VT, - DAG.getNode(ISD::SHL, dl, VT, Value, - DAG.getConstant(Shift, - TLI.getShiftAmountTy())), - Value); - Shift <<= 1; + if (NumBits > 8) { + // Use a multiplication with 0x010101... to extend the input to the + // required length. + APInt Magic = SplatByte(NumBits, 0x01); + Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); } return Value; @@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - else if (VT.getSimpleVT().SimpleTy == MVT::f32 || - VT.getSimpleVT().SimpleTy == MVT::f64) + else if (VT == MVT::f32 || VT == MVT::f64) return DAG.getConstantFP(0.0, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts))); } else @@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT.bitsGT(LVT)) VT = LVT; } - - // If we're optimizing for size, and there is a limit, bump the maximum number - // of operations inserted down to 4. This is a wild guess that approximates - // the size of a call to memcpy or memset (3 arguments + call). - if (Limit != ~0U) { - const Function *F = DAG.getMachineFunction().getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) - Limit = 4; - } unsigned NumMemOps = 0; while (Size != 0) { @@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SDValue Src, uint64_t Size, unsigned Align, bool isVol, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. if (Src.getOpcode() == ISD::UNDEF) return Chain; // Expand memcpy to a series of load and store ops if the size operand falls // below a certain threshold. + // TODO: In the AlwaysInline case, if the size is big then generate a loop + // rather than maybe a humongous number of loads and stores. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); - + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), @@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, isVol, false, Align); + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); } else { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right @@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); - Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, VT, isVol, false, + SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, VT, isVol, false, - Align); + DstPtrInfo.getWithOffset(DstOff), VT, isVol, + false, Align); } OutChains.push_back(Store); SrcOff += VTSize; @@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SDValue Src, uint64_t Size, unsigned Align, bool isVol, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. if (Src.getOpcode() == ISD::UNDEF) return Chain; @@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign); + SrcPtrInfo.getWithOffset(SrcOff), isVol, + false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, isVol, false, Align); + DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); DstOff += VTSize; } @@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, - const Value *DstSV, uint64_t DstSVOff) { + MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. if (Src.getOpcode() == ISD::UNDEF) return Chain; @@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector<EVT> MemOps; bool DstAlignCanChange = false; - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; bool NonScalarIntSafe = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); - if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, NonScalarIntSafe, false, DAG, TLI)) return SDValue(); @@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SmallVector<SDValue, 8> OutChains; uint64_t DstOff = 0; unsigned NumMemOps = MemOps.size(); + + // Find the largest store and generate the bit pattern for it. + EVT LargestVT = MemOps[0]; + for (unsigned i = 1; i < NumMemOps; i++) + if (MemOps[i].bitsGT(LargestVT)) + LargestVT = MemOps[i]; + SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; - unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value = getMemsetValue(Src, VT, DAG, dl); + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + SDValue Value = MemSetValue; + if (VT.bitsLT(LargestVT)) { + if (!LargestVT.isVector() && !VT.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else + Value = getMemsetValue(Src, VT, DAG, dl); + } + assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, isVol, false, 0); + DstPtrInfo.getWithOffset(DstOff), + isVol, false, Align); OutChains.push_back(Store); - DstOff += VTSize; + DstOff += VT.getSizeInBits() / 8; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),Align, - isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, - DstSV, DstSVOff, SrcSV, SrcSVOff); + DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Align, isVol, - true, DstSV, DstSVOff, SrcSV, SrcSVOff); + true, DstPtrInfo, SrcPtrInfo); } // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc @@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Align, isVol, - false, DstSV, DstSVOff, SrcSV, SrcSVOff); + false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. SDValue Result = TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstSV, DstSVOff, SrcSV, SrcSVOff); + DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, - const Value *DstSV, uint64_t DstSVOff) { + MachinePointerInfo DstPtrInfo) { // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. @@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstSV, DstSVOff); + Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; @@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. SDValue Result = TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstSV, DstSVOff); + DstPtrInfo); if (Result.getNode()) return Result; - // Emit a library call. + // Emit a library call. const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, } SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Cmp, - SDValue Swp, const Value* PtrVal, + SDValue Chain, SDValue Ptr, SDValue Cmp, + SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); - // Check if the memory reference references a frame index - if (!PtrVal) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrVal, Flags, 0, - MemVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO); } @@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); - // Check if the memory reference references a frame index - if (!PtrVal) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; @@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrVal, Flags, 0, + MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); @@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -/// Allowed to return something different (and simpler) if Simplify is true. SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, DebugLoc dl) { if (NumOps == 1) @@ -3803,18 +3931,18 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, - EVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, - MemVT, srcValue, SVOff, Align, Vol, + MemVT, PtrInfo, Align, Vol, ReadMem, WriteMem); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, - EVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { if (Align == 0) // Ensure that codegen never sees alignment 0 @@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (Vol) Flags |= MachineMemOperand::MOVolatile; MachineMemOperand *MMO = - MF.getMachineMemOperand(srcValue, Flags, SVOff, - MemVT.getStoreSize(), Align); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } @@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, EVT MemVT, MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::PREFETCH || (Opcode <= INT_MAX && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); // Memoize the node unless it returns a flag. MemIntrinsicSDNode *N; - if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; @@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, return SDValue(N, 0); } +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) { + // If this is FI+Offset, we can model it. + if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) + return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset); + + // If this is (FI+Offset1)+Offset2, we can model it. + if (Ptr.getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Ptr.getOperand(1)) || + !isa<FrameIndexSDNode>(Ptr.getOperand(0))) + return MachinePointerInfo(); + + int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + return MachinePointerInfo::getFixedStack(FI, Offset+ + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { + // If the 'Offset' value isn't a constant, we can't handle this. + if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) + return InferPointerInfo(Ptr, OffsetNode->getSExtValue()); + if (OffsetOp.getOpcode() == ISD::UNDEF) + return InferPointerInfo(Ptr); + return MachinePointerInfo(); +} + + SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, - const Value *SV, int SVOffset, EVT MemVT, + MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); - // Check if the memory reference references a frame index - if (!SV) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOLoad; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr, Offset); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(SV, Flags, SVOffset, - MemVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, + TBAAInfo); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { @@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - const Value *SV, int SVOffset, + MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); + PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, - const Value *SV, - int SVOffset, EVT MemVT, + MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); + PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); } + SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { @@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, assert(LD->getOffset().getOpcode() == ISD::UNDEF && "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, - LD->getChain(), Base, Offset, LD->getSrcValue(), - LD->getSrcValueOffset(), LD->getMemoryVT(), + LD->getChain(), Base, Offset, LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, - SDValue Ptr, const Value *SV, int SVOffset, + SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + unsigned Alignment, const MDNode *TBAAInfo) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); - // Check if the memory reference references a frame index - if (!SV) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(SV, Flags, SVOffset, - Val.getValueType().getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, + Val.getValueType().getStoreSize(), Alignment, + TBAAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, } SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, - SDValue Ptr, const Value *SV, - int SVOffset, EVT SVT, - bool isVolatile, bool isNonTemporal, - unsigned Alignment) { + SDValue Ptr, MachinePointerInfo PtrInfo, + EVT SVT,bool isVolatile, bool isNonTemporal, + unsigned Alignment, + const MDNode *TBAAInfo) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); - // Check if the memory reference references a frame index - if (!SV) - if (const FrameIndexSDNode *FI = - dyn_cast<const FrameIndexSDNode>(Ptr.getNode())) - SV = PseudoSourceValue::getFixedStack(FI->getIndex()); - - MachineFunction &MF = getMachineFunction(); unsigned Flags = MachineMemOperand::MOStore; if (isVolatile) Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V == 0) + PtrInfo = InferPointerInfo(Ptr); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); + MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, + TBAAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDNode *N; SDVTList VTs = getVTList(VT); - if (VT != MVT::Flag) { + if (VT != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; @@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; - if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; @@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, } AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifySDNode(N); #endif return SDValue(N, 0); } @@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, unsigned NumOps) { // If an identical node already exists, use it. void *IP = 0; - if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { + if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) @@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, const SDValue *Ops, unsigned NumOps) { - bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag; + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; - void *IP; + void *IP = 0; if (DoCSE) { FoldingSetNodeID ID; @@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, AllNodes.push_back(N); #ifndef NDEBUG - VerifyNode(N); + VerifyMachineNode(N); #endif return N; } @@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, const SDValue *Ops, unsigned NumOps) { - if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; @@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { SD->setHasDebugValue(true); } +/// TransferDbgValues - Transfer SDDbgValues. +void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { + if (From == To || !From.getNode()->getHasDebugValue()) + return; + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode); + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end(); + I != E; ++I) { + SDDbgValue *Dbg = *I; + if (Dbg->getKind() == SDDbgValue::SDNODE) { + SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), + Dbg->getOffset(), Dbg->getDebugLoc(), + Dbg->getOrder()); + ClonedDVs.push_back(Clone); + } + } + for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(), + E = ClonedDVs.end(); I != E; ++I) + AddDbgValue(*I, ToNode, false); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// @@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, } MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, unsigned NumOps, EVT memvt, + const SDValue *Ops, unsigned NumOps, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { @@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { namespace { struct EVTArray { std::vector<EVT> VTs; - + EVTArray() { VTs.reserve(MVT::LAST_VALUETYPE); for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) @@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) { sys::SmartScopedLock<true> Lock(*VTMutex); return &(*EVTs->insert(VT).first); } else { - assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && "Value type out of range!"); return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } @@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const { /// reachesChainWithoutSideEffects - Return true if this operand (which must /// be a chain) reaches the specified operand without crossing any -/// side-effecting instructions. In practice, this looks through token -/// factors and non-volatile loads. In order to remain efficient, this only -/// looks a couple of nodes in, it does not do an exhaustive search. +/// side-effecting instructions on any chain path. In practice, this looks +/// through token factors and non-volatile loads. In order to remain efficient, +/// this only looks a couple of nodes in, it does not do an exhaustive search. bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth) const { if (*this == Dest) return true; @@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, if (Depth == 0) return false; // If this is a token factor, all inputs to the TF happen in parallel. If any - // of the operands of the TF reach dest, then we can do the xform. + // of the operands of the TF does not reach dest, then we cannot do the xform. if (getOpcode() == ISD::TokenFactor) { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) - return true; - return false; + if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return false; + return true; } // Loads don't have side effects, look through them. @@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP"; case ISD::ConstantPool: return "ConstantPool"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; @@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; @@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BIT_CONVERT: return "bit_convert"; + case ISD::BITCAST: return "bit_convert"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - if (G && R->getReg() && - TargetRegisterInfo::isPhysicalRegister(R->getReg())) { - OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg()); - } else { - OS << " %reg" << R->getReg(); - } + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { const char *AM = getIndexedModeName(ST->getAddressingMode()); if (*AM) OS << ", " << AM; - + OS << ">"; } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { OS << "<" << *M->getMemOperand() << ">"; @@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, const SelectionDAG *G, unsigned depth, - unsigned indent) + unsigned indent) { if (depth == 0) return; @@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, unsigned depth) const { printrWithDepthHelper(OS, this, G, depth, 0); -} +} void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { // Don't print impossibly deep things. @@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { void SDNode::dumprFull(const SelectionDAG *G) const { // Don't print impossibly deep things. dumprWithDepth(G, 100); -} +} static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) @@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { } -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a -/// location that is 'Dist' units away from the location that the 'Base' load +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load /// is loading from. -bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const { if (LD->getChain() != Base->getChain()) return false; @@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); - if (V && (V->getSExtValue() == Dist*Bytes)) - return true; - } + + // Handle X+C + if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && + cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) + return true; const GlobalValue *GV1 = NULL; const GlobalValue *GV2 = NULL; @@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t FrameOffset = 0; if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { FrameIdx = FI->getIndex(); - } else if (Ptr.getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Ptr.getOperand(1)) && + } else if (isBaseWithConstantOffset(Ptr) && isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + // Handle FI+Cst FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); FrameOffset = Ptr.getConstantOperandVal(1); } if (FrameIdx != (1 << 31)) { - // FIXME: Handle FI+CST. const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); @@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, if (OpVal.getOpcode() == ISD::UNDEF) SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) - SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize). + SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). zextOrTrunc(sz) << BitPos; else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; @@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, while (sz > 8) { unsigned HalfSize = sz / 2; - APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize); - APInt LowValue = APInt(SplatValue).trunc(HalfSize); - APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize); - APInt LowUndef = APInt(SplatUndef).trunc(HalfSize); + APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatValue.trunc(HalfSize); + APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = SplatUndef.trunc(HalfSize); // If the two halves do not match (ignoring undef bits), stop here. if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || @@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N, // If this node has already been checked, don't check it again. if (Checked.count(N)) return; - + // If a node has already been visited on this depth-first walk, reject it as // a cycle. if (!Visited.insert(N)) { @@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N, errs() << "Detected cycle in SelectionDAG\n"; abort(); } - + for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); - + Checked.insert(N); Visited.erase(N); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e65744592c8b..452f5614b7bf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" @@ -43,9 +44,8 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/DebugInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" @@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +// Limit the width of DAG chains. This is important in general to prevent +// prevent DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach, and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static cl::opt<unsigned> +MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), + cl::init(64), cl::Hidden); + static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, EVT PartVT, EVT ValueVT); - + /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT); - + assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; @@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2, PartVT, HalfVT); } else { - Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]); + Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); } if (TLI.isBigEndian()) @@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); @@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, } if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); return SDValue(); @@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; - + // Handle a multi-element vector. if (NumParts > 1) { EVT IntermediateVT, RegisterVT; @@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); assert(RegisterVT == Parts[0].getValueType() && "Part type doesn't match part!"); - + // Assemble the parts into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); if (NumIntermediates == NumParts) { @@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT, IntermediateVT); } - + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, ValueVT, &Ops[0], NumIntermediates); } - + // There is now one part, held in Val. Correct it to match ValueVT. PartVT = Val.getValueType(); - + if (PartVT == ValueVT) return Val; - + if (PartVT.isVector()) { // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a @@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getIntPtrConstant(0)); - } - + } + // Vector/Vector bitcast. - return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val); + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } - + assert(ValueVT.getVectorElementType() == PartVT && ValueVT.getVectorNumElements() == 1 && "Only trivial scalar-to-vector conversions should get here!"); @@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, EVT PartVT); - + /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. @@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); - + // Handle the vector case separately. if (ValueVT.isVector()) return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT); - + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); } else { assert(PartVT.isInteger() && ValueVT.isInteger() && - "Unknown mismatch!"); + "Unknown mismatch!"); ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. assert(NumParts == 1 && PartVT != ValueVT); - Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. assert(PartVT.isInteger() && ValueVT.isInteger() && @@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. - Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL, + Parts[0] = DAG.getNode(ISD::BITCAST, DL, EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); @@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, ThisVT, Part0, DAG.getIntPtrConstant(0)); if (ThisBits == PartBits && ThisVT != PartVT) { - Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0); - Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1); + Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); } } } @@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - + if (NumParts == 1) { if (PartVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. - Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && PartVT.getVectorElementType() == ValueVT.getVectorElementType()&& PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { @@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, DAG.getIntPtrConstant(i))); - + for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); @@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); // FIXME: Use CONCAT for 2x -> 4x. - + //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else { @@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getIntPtrConstant(0)); } - + Parts[0] = Val; return; } - + // Handle a multi-element vector. EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; @@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); - + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - + // Split the vector into intermediate operands. SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, DAG.getIntPtrConstant(i)); } - + // Split the intermediate operands into legal parts. if (NumParts == NumIntermediates) { // If the register was not expanded, promote or copy the value, @@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, } Chain = P.getValue(1); + Parts[i] = P; // If the source register was virtual and if we know something about it, // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { - const FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo.LiveOutRegInfo[SlotNo]; - - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } + if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + !RegisterVT.isInteger() || RegisterVT.isVector() || + !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i])) + continue; + + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[Regs[Part+i]]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + else + continue; - Parts[i] = P; + // Add an assertion node. + assert(FromVT != MVT::Other); + Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); } Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), @@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, Val.getResNo(), Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } - } else { - SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); - } + } else + DEBUG(dbgs() << "Dropping debug info for " << DI); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { unsigned InReg = It->second; RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL); + resolveDanglingDebugInfo(V, N); + return N; } // Otherwise create a new SDValue and remember it. @@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Chains[i] = DAG.getStore(Chain, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), - Add, NULL, Offsets[i], false, false, 0); + // FIXME: better loc info would be nice. + Add, MachinePointerInfo(), false, false, 0); } Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), @@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){ if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) return false; } - + return true; } @@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive, this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq @@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (BOp->hasOneUse() && + if (!TLI.isJumpExpensive() && + BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, @@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // Insert the false branch. - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); + // Insert the false branch. Do this even if it's a fall through branch, + // this makes it easier to do DAG optimizations which require inverting + // the branch condition. + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); - SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), - TLI.getPointerTy()); + // Determine the type of the test operands. + bool UsePtrType = false; + if (!TLI.isTypeLegal(VT)) + UsePtrType = true; + else { + for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) + if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + UsePtrType = true; + break; + } + } + if (UsePtrType) { + VT = TLI.getPointerTy(); + Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); + } - B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); + B.RegVT = VT; + B.Reg = FuncInfo.CreateReg(VT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), - B.Reg, ShiftOp); + B.Reg, Sub); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } /// visitBitTestCase - this function produces one "bit test" -void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, +void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, - TLI.getPointerTy()); + EVT VT = BB.RegVT; + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + Reg, VT); SDValue Cmp; if (CountPopulation_64(B.Mask) == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(ShiftOp.getValueType()), + TLI.getSetCCResultType(VT), ShiftOp, - DAG.getConstant(CountTrailingZeros_64(B.Mask), - TLI.getPointerTy()), + DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), ISD::SETEQ); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, + DAG.getConstant(1, VT), ShiftOp); // Emit bit tests and jumps SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); + VT, SwitchVal, DAG.getConstant(B.Mask, VT)); Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), + TLI.getSetCCResultType(VT), + AndOp, DAG.getConstant(0, VT), ISD::SETNE); } @@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - // TODO: If any two of the cases has the same destination, and if one value + // If any two of the cases has the same destination, and if one value // is the same as the other, but has one bit unset that the other has set, // use bit manipulation to do two compares at once. For example: // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 cases. + // TODO: Handle cases where CR.CaseBB != SwitchBB. + if (Size == 2 && CR.CaseBB == SwitchBB) { + Case &Small = *CR.Range.first; + Case &Big = *(CR.Range.second-1); + + if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { + const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue(); + const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue(); + + // Check that there is only one bit different. + if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && + (SmallValue | BigValue) == BigValue) { + // Isolate the common bit. + APInt CommonBit = BigValue & ~SmallValue; + assert((SmallValue | CommonBit) == BigValue && + CommonBit.countPopulation() == 1 && "Not a common bit?"); + + SDValue CondLHS = getValue(SV); + EVT VT = CondLHS.getValueType(); + DebugLoc DL = getCurDebugLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, VT)); + SDValue Cond = DAG.getSetCC(DL, MVT::i1, + Or, DAG.getConstant(BigValue, VT), + ISD::SETEQ); + + // Update successor info. + SwitchBB->addSuccessor(Small.BB); + SwitchBB->addSuccessor(Default); + + // Insert the true branch. + SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, + getControlRoot(), Cond, + DAG.getBasicBlock(Small.BB)); + + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(Default)); + + DAG.setRoot(BrCond); + return true; + } + } + } // Rearrange the case blocks so that the last one falls through if possible. if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) { @@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { } static APInt ComputeRange(const APInt &First, const APInt &Last) { - APInt LastExt(Last), FirstExt(First); uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - LastExt.sext(BitWidth); FirstExt.sext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, } BitTestBlock BTB(lowBound, cmpRange, SV, - -1U, (CR.CaseBB == SwitchBB), + -1U, MVT::Other, (CR.CaseBB == SwitchBB), CR.CaseBB, Default, BTC); if (CR.CaseBB == SwitchBB) @@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, if (Cases.size() >= 2) // Must recompute end() each iteration because it may be // invalidated by erase if we hold on to it - for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) { + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue(); const APInt& currentValue = cast<ConstantInt>(I->High)->getValue(); MachineBasicBlock* nextBB = J->BB; @@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, return numCmps; } +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, + MachineBasicBlock *Last) { + // Update JTCases. + for (unsigned i = 0, e = JTCases.size(); i != e; ++i) + if (JTCases[i].first.HeaderBB == First) + JTCases[i].first.HeaderBB = Last; + + // Update BitTestCases. + for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) + if (BitTestCases[i].Parent == First) + BitTestCases[i].Parent = Last; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; @@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); - if (Ty->isVectorTy()) { - if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { - const VectorType *DestTy = cast<VectorType>(I.getType()); - const Type *ElTy = DestTy->getElementType(); - unsigned VL = DestTy->getNumElements(); - std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); - Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); - if (CV == CNZ) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } - } + if (isa<Constant>(I.getOperand(0)) && + I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; } - if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) - if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } - visitBinary(I, ISD::FSUB); } @@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - if (!I.getType()->isVectorTy() && - Op2.getValueType() != TLI.getShiftAmountTy()) { + + MVT ShiftTy = TLI.getShiftAmountTy(); + + // Coerce the shift amount to the right type if we can. + if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { + unsigned ShiftSize = ShiftTy.getSizeInBits(); + unsigned Op2Size = Op2.getValueType().getSizeInBits(); + DebugLoc DL = getCurDebugLoc(); + // If the operand is smaller than the shift count type, promote it. - EVT PTy = TLI.getPointerTy(); - EVT STy = TLI.getShiftAmountTy(); - if (STy.bitsGT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), - TLI.getShiftAmountTy(), Op2); + if (ShiftSize > Op2Size) + Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); + // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (STy.getSizeInBits() >= - Log2_32_Ceil(Op2.getValueType().getSizeInBits())) - Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getShiftAmountTy(), Op2); - // Otherwise we'll need to temporarily settle for some other - // convenient type; type legalization will make adjustments as - // needed. - else if (PTy.bitsLT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), Op2); - else if (PTy.bitsGT(Op2.getValueType())) - Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), Op2); + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); + // Otherwise we'll need to temporarily settle for some other convenient + // type. Type legalization will make adjustments once the shiftee is split. + else + Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), @@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { EVT DestVT = TLI.getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is - // either a BIT_CONVERT or a no-op. + // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), DestVT, N)); // convert types. else setValue(&I, N); // noop cast. @@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } else { StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts < SrcNumElts) + StartIdx[Input] + MaskNumElts <= SrcNumElts) RangeUse[Input] = 1; // Extract from a multiple of the mask length. } } @@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); SmallVector<EVT, 4> AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); @@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { const Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, - I.idx_begin(), I.idx_end()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end()); SmallVector<EVT, 4> ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); @@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = TM.getFrameInfo()->getStackAlignment(); + unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Root; bool ConstantMemory = false; - if (I.isVolatile()) + if (I.isVolatile() || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(SV)) { + else if (AA->pointsToConstantMemory( + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(NumValues); + SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), + NumValues)); EVT PtrVT = Ptr.getValueType(); - for (unsigned i = 0; i != NumValues; ++i) { + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // Serializing loads here may result in excessive register pressure, and + // TokenFactor places arbitrary choke points on the scheduler. SD scheduling + // could recover a bit by hoisting nodes upward in the chain by recognizing + // they are side-effect free or do not alias. The optimizer should really + // avoid this case by converting large object/array copies to llvm.memcpy + // (MaxParallelChains should always remain as failsafe). + if (ChainI == MaxParallelChains) { + assert(PendingLoads.empty() && "PendingLoads must be serialized first"); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - A, SV, Offsets[i], isVolatile, - isNonTemporal, Alignment); + A, MachinePointerInfo(SV, Offsets[i]), isVolatile, + isNonTemporal, Alignment, TBAAInfo); Values[i] = L; - Chains[i] = L.getValue(1); + Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, &Chains[0], ChainI); if (isVolatile) DAG.setRoot(Chain); else @@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); - SmallVector<SDValue, 4> Chains(NumValues); + SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), + NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; unsigned Alignment = I.getAlignment(); - - for (unsigned i = 0; i != NumValues; ++i) { + const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // See visitLoad comments. + if (ChainI == MaxParallelChains) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + Root = Chain; + ChainI = 0; + } SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - Chains[i] = DAG.getStore(Root, getCurDebugLoc(), - SDValue(Src.getNode(), Src.getResNo() + i), - Add, PtrV, Offsets[i], isVolatile, - isNonTemporal, Alignment); - } - - DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), - MVT::Other, &Chains[0], NumValues)); + SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue(Src.getNode(), Src.getResNo() + i), + Add, MachinePointerInfo(PtrV, Offsets[i]), + isVolatile, isNonTemporal, Alignment, TBAAInfo); + Chains[ChainI] = St; + } + + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + MVT::Other, &Chains[0], ChainI); + ++SDNodeOrder; + AssignOrderingToNode(StoreNode.getNode()); + DAG.setRoot(StoreNode); } /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC @@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic) + if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || + Info.opc == ISD::INTRINSIC_W_CHAIN) Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. @@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), VTs, &Ops[0], Ops.size(), - Info.memVT, Info.ptrVal, Info.offset, + Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { @@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(PTy); - Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); + Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); } setValue(&I, Result); @@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, DAG.getConstant(0x3f800000, MVT::i32)); - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); } /// GetExponent - Get the exponent: @@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5); // Add the exponent into the result in integer domain. SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6); + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: // @@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7); + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7); // Add the exponent into the result in integer domain. SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8); + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: // @@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl, + SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); // Add the exponent into the result in integer domain. SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, TwoToFracPartOfX, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14); + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14); } } else { // No special expansion. @@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getArgOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); @@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getArgOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); @@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op = getValue(I.getArgOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. SDValue Exp = GetExponent(DAG, Op1, TLI, dl); @@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: @@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: @@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } } else { @@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5); + SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { // For floating-point precision of 12: @@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7); + SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 // For floating-point precision of 18: @@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13); + SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); SDValue TwoToFractionalPartOfX = DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - result = DAG.getNode(ISD::BIT_CONVERT, dl, + result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, TwoToFractionalPartOfX); } } else { @@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, + int64_t Offset, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); + // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) @@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. // Use this info directly. - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); Reg = TRI->getFrameRegister(MF); Offset = FuncInfo.getByValArgumentFrameIndex(Arg); + // If byval argument ofset is not recorded then ignore this. + if (!Offset) + Reg = 0; } if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) { Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned PR = RegInfo.getLiveInPhysReg(Reg); if (PR) @@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, } if (!Reg) { + // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI == FuncInfo.ValueMap.end()) - return false; - Reg = VMI->second; + if (VMI != FuncInfo.ValueMap.end()) + Reg = VMI->second; } - const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + if (!Reg && N.getNode()) { + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) { + Reg = TRI->getFrameRegister(MF); + Offset = FINode->getIndex(); + } + } + + if (!Reg) + return false; + MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); @@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, } // VisualStudio defines setjmp as _setjmp -#if defined(_MSC_VER) && defined(setjmp) -#define setjmp_undefined_for_visual_studio -#undef setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc #endif /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If @@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); return 0; } case Intrinsic::memset: { @@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getArgOperand(0), 0)); + MachinePointerInfo(I.getArgOperand(0)))); return 0; } case Intrinsic::memmove: { @@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); - - // If the source and destination are known to not be aliases, we can - // lower memmove as memcpy. - uint64_t Size = -1ULL; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3)) - Size = C->getZExtValue(); - if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == - AliasAnalysis::NoAlias) { - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getArgOperand(0), 0, - I.getArgOperand(1), 0)); - return 0; - } - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getArgOperand(0), 0, I.getArgOperand(1), 0)); + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)))); return 0; } case Intrinsic::dbg_declare: { @@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check if address has undef value. if (isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - SDDbgValue*SDV = - DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + DEBUG(dbgs() << "Dropping debug info for " << DI); return 0; } @@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgValue *SDV; if (N.getNode()) { // Parameters are handled specially. - bool isParameter = + bool isParameter = DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable; if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); @@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Byval parameter. We have a frame index at this point. SDV = DAG.getDbgValue(Variable, FINode->getIndex(), 0, dl, SDNodeOrder); - else + else { // Can't do anything with other non-AI cases yet. This might be a // parameter of a callee function that got inlined, for example. + DEBUG(dbgs() << "Dropping debug info for " << DI); return 0; + } } else if (AI) SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), 0, dl, SDNodeOrder); - else + else { // Can't do anything with other non-AI cases yet. + DEBUG(dbgs() << "Dropping debug info for " << DI); return 0; + } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { - // If Address is an arugment then try to emits its dbg value using - // virtual register info from the FuncInfo.ValueMap. Otherwise add undef - // to help track missing debug info. + // If Address is an argument then try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { - SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()), - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + // If variable is pinned by a alloca in dominating bb then + // use StaticAllocaMap. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + if (AI->getParent() != DI.getParent()) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + SDV = DAG.getDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, 0, false); + return 0; + } + } + } + DEBUG(dbgs() << "Dropping debug info for " << DI); } } return 0; @@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { N.getResNo(), Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } - } else if (isa<PHINode>(V) && !V->use_empty() ) { + } else if (!V->use_empty() ) { // Do not call getValue(V) yet, as we don't want to generate code. // Remember it for later. DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); DanglingDebugInfoMap[V] = DDI; } else { // We may expand this to cover more cases. One case where we have no - // data available is an unreferenced parameter; we need this fallback. - SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()), - Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + // data available is an unreferenced parameter. + DEBUG(dbgs() << "Dropping debug info for " << DI); } } @@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (SI == FuncInfo.StaticAllocaMap.end()) return 0; // VLAs. int FI = SI->second; - + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); @@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, - getRoot(), - getValue(I.getArgOperand(0)))); + getRoot(), getValue(I.getArgOperand(0)))); + return 0; + } + case Intrinsic::eh_sjlj_dispatch_setup: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, + getRoot(), getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = getValue(I.getArgOperand(1)); + if (isa<ConstantSDNode>(ShAmt)) { + visitTargetIntrinsic(I, Intrinsic); + return 0; + } + unsigned NewIntrinsic = 0; + EVT ShAmtVT = MVT::v2i32; + switch (Intrinsic) { + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + // We must do this early because v2i32 is not a legal type. + DebugLoc dl = getCurDebugLoc(); + SDValue ShOps[2]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI.getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + DAG.getConstant(NewIntrinsic, MVT::i32), + getValue(I.getArgOperand(0)), ShAmt); + setValue(&I, Res); + return 0; + } case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, - PseudoSourceValue::getFixedStack(FI), - 0, true, false, 0); + MachinePointerInfo::getFixedStack(FI), + true, false, 0); setValue(&I, Res); DAG.setRoot(Res); return 0; @@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; + unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); - DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.getVTList(MVT::Other), + &Ops[0], 4, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + false, /* volatile */ + rw==0, /* read */ + rw==1)); /* write */ return 0; } - case Intrinsic::memory_barrier: { SDValue Ops[6]; Ops[0] = getRoot(); @@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)), - I.getArgOperand(0)); + MachinePointerInfo(I.getArgOperand(0))); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; + int DemoteStackIdx = -100; if (!CanLowerReturn) { uint64_t TySize = TLI.getTargetData()->getTypeAllocSize( @@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, unsigned Align = TLI.getTargetData()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, - Add, NULL, Offsets[i], false, false, 1); + Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), + false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); - + // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. SmallVector<EVT, 4> RetTys; @@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, EVT VT = RetTys[I]; EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT); - + SDValue ReturnValue = getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs, RegisterVT, VT, AssertOp); @@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, - Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, + Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, false /*nontemporal*/, 1 /* align=1 */); @@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { visitInlineAsm(&I); return; } - + + // See if any floating point values are being passed to this function. This is + // used to emit an undefined reference to fltused on Windows. + const FunctionType *FT = + cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0)); + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + if (FT->isVarArg() && + !MMI.callsExternalVAFunctionWithFloatingPointArguments()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + const Type* T = I.getArgOperand(i)->getType(); + for (po_iterator<const Type*> i = po_begin(T), e = po_end(T); + i != e; ++i) { + if (!i->isFloatingPointTy()) continue; + MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true); + break; + } + } + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { @@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } } } - + SDValue Callee; if (!RenameFn) Callee = getValue(I.getCalledValue()); @@ -5008,7 +5226,7 @@ public: /// contains the set of register corresponding to the operand. RegsForValue AssignedRegs; - explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info) + explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { } @@ -5083,6 +5301,8 @@ private: } }; +typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; + } // end llvm namespace. /// isAllocatableRegister - If the specified register is safe to allocate, @@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } @@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. - std::vector<SDISelAsmOperandInfo> ConstraintOperands; + SDISelAsmOperandInfoVector ConstraintOperands; std::set<unsigned> OutputRegs, InputRegs; - // Do a prepass over the constraints, canonicalizing them, and building up the - // ConstraintOperands list. - std::vector<InlineAsm::ConstraintInfo> - ConstraintInfos = IA->ParseConstraints(); - - bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI); - - SDValue Chain, Flag; - - // We won't need to flush pending loads if this asm doesn't touch - // memory and is nonvolatile. - if (hasMemory || IA->hasSideEffects()) - Chain = getRoot(); - else - Chain = DAG.getRoot(); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS); + bool hasMemory = false; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); EVT OpVT = MVT::Other; @@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { - // Strip bitcasts, if any. This mostly comes up for functions. - OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); - if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { @@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } OpInfo.ConstraintVT = OpVT; + + // Indirect operand accesses access memory. + if (OpInfo.isIndirect) + hasMemory = true; + else { + for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]); + if (CType == TargetLowering::C_Memory) { + hasMemory = true; + break; + } + } + } } + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + // Second pass over the constraints: compute which constraint option to use // and assign registers to constraints that want a specific physreg. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; // If this is an output operand with a matching input operand, look up the @@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // error. if (OpInfo.hasMatchingInput()) { SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; - + if (OpInfo.ConstraintVT != Input.ConstraintVT) { if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { - assert(OpInfo.Type == InlineAsm::isInput && + assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) && "Can only indirectify direct input operands!"); // Memory operands really want the address of the value. If we don't have @@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurDebugLoc(), - OpInfo.CallOperand, StackSlot, NULL, 0, + OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(SSFI), false, false, 0); OpInfo.CallOperand = StackSlot; } @@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { GetRegistersForValue(OpInfo, OutputRegs, InputRegs); } - ConstraintInfos.clear(); - // Second pass - Loop over all of the operands, assigning virtual or physregs // to register class operands. for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { @@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); - // Remember the AlignStack bit as operand 3. - AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, - MVT::i1)); + // Remember the HasSideEffect and AlignStack bits as operand 3. + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { " don't know how to handle tied " "indirect register inputs"); } - + RegsForValue MatchedRegs; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); @@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG, AsmNodeOperands); break; } - + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && "Unexpected number of operands"); @@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && - OpInfo.isIndirect) + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; if (OpInfo.ConstraintType == TargetLowering::C_Other) { @@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } - + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy() && @@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), - DAG.getVTList(MVT::Other, MVT::Flag), + DAG.getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); Flag = Chain.getValue(1); @@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { - Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), + Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && @@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - StoresToEmit[i].second, 0, + MachinePointerInfo(StoresToEmit[i].second), false, false, 0); OutChains.push_back(Val); } @@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; + MyFlags.VT = RegisterVT.getSimpleVT(); MyFlags.Used = isReturnValueUsed; if (RetSExt) MyFlags.Flags.setSExt(); @@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerCall emitted a null value!"); - assert(Ins[i].VT == InVals[i].getValueType() && + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerCall emitted a value with the wrong type!"); }); @@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(InVals[i].getNode() && "LowerFormalArguments emitted a null value!"); - assert(Ins[i].VT == InVals[i].getValueType() && + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && "LowerFormalArguments emitted a value with the wrong type!"); } }); @@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Note down frame index for byval arguments. if (I->hasByValAttr() && !ArgValues.empty()) - if (FrameIndexSDNode *FI = + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 5f400e9c83ac..a1a70c394a51 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -258,15 +258,16 @@ private: struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, bool E, + unsigned Rg, EVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, const BitTestInfo& C): - First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E), + First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), Parent(P), Default(D), Cases(C) { } APInt First; APInt Range; const Value *SValue; unsigned Reg; + EVT RegVT; bool Emitted; MachineBasicBlock *Parent; MachineBasicBlock *Default; @@ -347,7 +348,7 @@ public: SDValue getControlRoot(); DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - + void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; } unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); @@ -398,6 +399,10 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the + /// references that ned to refer to the last resulting block. + void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + private: // Terminator instructions. void visitRet(const ReturnInst &I); @@ -431,7 +436,8 @@ public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); - void visitBitTestCase(MachineBasicBlock* NextMBB, + void visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 66cb5ceb09e5..62ebc81ef86e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -43,6 +43,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -53,8 +54,17 @@ using namespace llvm; STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); +STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +#ifndef NDEBUG +STATISTIC(NumBBWithOutOfOrderLineInfo, + "Number of blocks with out of order line number info"); +STATISTIC(NumMBBWithOutOfOrderLineInfo, + "Number of machine blocks with out of order line number info"); +#endif + static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " @@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : +SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, + CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), - DAGSize(0) -{} + DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry()); + } SelectionDAGISel::~SelectionDAGISel() { delete SDB; @@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { static bool FunctionCallsSetJmp(const Function *F) { const Module *M = F->getParent(); static const char *ReturnsTwiceFns[] = { + "_setjmp", "setjmp", "sigsetjmp", "setjmp_syscall", @@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) { #undef NUM_RETURNS_TWICE_FNS } +/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that +/// may trap on it. In this case we have to split the edge so that the path +/// through the predecessor block that doesn't go to the phi block doesn't +/// execute the possibly trapping instruction. +/// +/// This is required for correctness, so it must be done at -O0. +/// +static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { + // Loop for blocks with phi nodes. + for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + PHINode *PN = dyn_cast<PHINode>(BB->begin()); + if (PN == 0) continue; + + ReprocessBlock: + // For each block with a PHI node, check to see if any of the input values + // are potentially trapping constant expressions. Constant expressions are + // the only potentially trapping value that can occur as the argument to a + // PHI. + for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); + if (CE == 0 || !CE->canTrap()) continue; + + // The only case we have to worry about is when the edge is critical. + // Since this block has a PHI Node, we assume it has multiple input + // edges: check to see if the pred has multiple successors. + BasicBlock *Pred = PN->getIncomingBlock(i); + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + // Okay, we have to split this edge. + SplitCriticalEdge(Pred->getTerminator(), + GetSuccessorNumber(Pred, BB), SDISel, true); + goto ReprocessBlock; + } + } +} + bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && @@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); + CurDAG->init(*MF); FuncInfo->set(Fn, *MF); SDB->init(GFI, *AA); @@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), E = RegInfo->livein_end(); LI != E; ++LI) - if (LI->second) + if (LI->second) LiveInMap.insert(std::make_pair(LI->first, LI->second)); // Insert DBG_VALUE instructions for function arguments to the entry block. @@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (LDI != LiveInMap.end()) { MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; - const MDNode *Variable = + const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); unsigned Offset = MI->getOperand(1).getImm(); // Def is never a terminator here, so it is ok to increment InsertPos. - BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), + BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) .addReg(LDI->second, RegState::Debug) .addImm(Offset).addMetadata(Variable); + + // If this vreg is directly copied into an exported register then + // that COPY instructions also need DBG_VALUE, if it is the only + // user of LDI->second. + MachineInstr *CopyUseMI = NULL; + for (MachineRegisterInfo::use_iterator + UI = RegInfo->use_begin(LDI->second); + MachineInstr *UseMI = UI.skipInstruction();) { + if (UseMI->isDebugValue()) continue; + if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { + CopyUseMI = UseMI; continue; + } + // Otherwise this is another use or second copy use. + CopyUseMI = NULL; break; + } + if (CopyUseMI) { + MachineInstr *NewMI = + BuildMI(*MF, CopyUseMI->getDebugLoc(), + TII.get(TargetOpcode::DBG_VALUE)) + .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) + .addImm(Offset).addMetadata(Variable); + EntryMBB->insertAfter(CopyUseMI, NewMI); + } } } @@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); - // Operand 1 of an inline asm instruction indicates whether the asm - // needs stack or not. - if ((II->isInlineAsm() && II->getOperand(1).getImm()) || - (TID.isCall() && !TID.isReturn())) { + if ((TID.isCall() && !TID.isReturn()) || + II->isStackAligningInlineAsm()) { MFI->setHasCalls(true); goto done; } @@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); + return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { // Only install this information if it tells us something. if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { - DestReg -= TargetRegisterInfo::FirstVirtualRegister; - if (DestReg >= FuncInfo->LiveOutRegInfo.size()) - FuncInfo->LiveOutRegInfo.resize(DestReg+1); + FuncInfo->LiveOutRegInfo.grow(DestReg); FunctionLoweringInfo::LiveOutInfo &LOI = FuncInfo->LiveOutRegInfo[DestReg]; LOI.NumSignBits = NumSignBits; @@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. + MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - FuncInfo->MBB = Scheduler->EmitSchedule(); + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); FuncInfo->InsertPt = Scheduler->InsertPos; } + // If the block was split, make sure we update any references that are used to + // update PHI nodes later on. + if (FirstMBB != LastMBB) + SDB->UpdateSplitBlock(FirstMBB, LastMBB); + // Free the scheduler state. { NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, @@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() { DEBUG(errs() << "===== Instruction selection begins:\n"); PreprocessISelDAG(); - + // Select target instructions for the DAG. { // Number all nodes with a topological order and set DAGSize. DAGSize = CurDAG->AssignTopologicalOrder(); - + // Create a dummy node (which is not added to allnodes), that adds // a reference to the root node, preventing it from being deleted, // and tracking any changes of the root. HandleSDNode Dummy(CurDAG->getRoot()); ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()); ++ISelPosition; - + // The AllNodes list is now topological-sorted. Visit the // nodes by starting at the end of the list (the root of the // graph) and preceding back toward the beginning (the entry @@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() { // makes it theoretically possible to disable the DAGCombiner. if (Node->use_empty()) continue; - + SDNode *ResNode = Select(Node); - + // FIXME: This is pretty gross. 'Select' should be changed to not return // anything at all and this code should be nuked with a tactical strike. - + // If node should not be replaced, continue with the next one. if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. if (ResNode) ReplaceUses(Node, ResNode); - + // If after the replacement this node is not used any more, // remove this dead node. if (Node->use_empty()) { // Don't delete EntryToken, etc. @@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() { CurDAG->RemoveDeadNode(Node, &ISU); } } - + CurDAG->setRoot(Dummy.getValue()); - } + } DEBUG(errs() << "===== Instruction selection ends:\n"); @@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() { } } + + + +bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + FastISel *FastIS) { + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) return false; + + // Figure out which vreg this is going into. + unsigned LoadReg = FastIS->getRegForValue(LI); + assert(LoadReg && "Load isn't already assigned a vreg? "); + + // Check to see what the uses of this vreg are. If it has no uses, or more + // than one use (at the machine instr level) then we can't fold it. + MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg); + if (RI == RegInfo->reg_end()) + return false; + + // See if there is exactly one use of the vreg. If there are multiple uses, + // then the instruction got lowered to multiple machine instructions or the + // use of the loaded value ended up being multiple operands of the result, in + // either case, we can't fold this. + MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI; + if (PostRI != RegInfo->reg_end()) + return false; + + assert(RI.getOperand().isUse() && + "The only use of the vreg must be a use, we haven't emitted the def!"); + + MachineInstr *User = &*RI; + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes, make + // sure they get inserted in a logical place before the new instruction. + FuncInfo->InsertPt = User; + FuncInfo->MBB = User->getParent(); + + // Ask the target to try folding the load. + return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI); +} + +#ifndef NDEBUG +/// CheckLineNumbers - Check if basic block instructions follow source order +/// or not. +static void CheckLineNumbers(const BasicBlock *BB) { + unsigned Line = 0; + unsigned Col = 0; + for (BasicBlock::const_iterator BI = BB->begin(), + BE = BB->end(); BI != BE; ++BI) { + const DebugLoc DL = BI->getDebugLoc(); + if (DL.isUnknown()) continue; + unsigned L = DL.getLine(); + unsigned C = DL.getCol(); + if (L < Line || (L == Line && C < Col)) { + ++NumBBWithOutOfOrderLineInfo; + return; + } + Line = L; + Col = C; + } +} + +/// CheckLineNumbers - Check if machine basic block instructions follow source +/// order or not. +static void CheckLineNumbers(const MachineBasicBlock *MBB) { + unsigned Line = 0; + unsigned Col = 0; + for (MachineBasicBlock::const_iterator MBI = MBB->begin(), + MBE = MBB->end(); MBI != MBE; ++MBI) { + const DebugLoc DL = MBI->getDebugLoc(); + if (DL.isUnknown()) continue; + unsigned L = DL.getLine(); + unsigned C = DL.getCol(); + if (L < Line || (L == Line && C < Col)) { + ++NumMBBWithOutOfOrderLineInfo; + return; + } + Line = L; + Col = C; + } +} +#endif + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Iterate over all basic blocks in the function. for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { const BasicBlock *LLVMBB = &*I; +#ifndef NDEBUG + CheckLineNumbers(LLVMBB); +#endif FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); @@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Setup an EH landing-pad block. if (FuncInfo->MBB->isLandingPad()) PrepareEHLandingPad(); - + // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) + if (LLVMBB == &Fn.getEntryBlock()) { + for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end(); + DBI != DBE; ++DBI) { + if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) { + const DebugLoc DL = DI->getDebugLoc(); + SDB->setCurDebugLoc(DL); + break; + } + } LowerArguments(LLVMBB); + } // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { @@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(Inst)) + if (FastIS->SelectInstruction(Inst)) { + // If fast isel succeeded, check to see if there is a single-use + // non-volatile load right before the selected instruction, and see if + // the load is used by the instruction. If so, try to fold it. + const Instruction *BeforeInst = 0; + if (Inst != Begin) + BeforeInst = llvm::prior(llvm::prior(BI)); + if (BeforeInst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS)) + --BI; // If we succeeded, don't re-select the load. continue; + } // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(Inst)) { @@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } + if (Begin != BI) + ++NumDAGBlocks; + else + ++NumFastIselBlocks; + // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. @@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } delete FastIS; +#ifndef NDEBUG + for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end(); + MBI != MBE; ++MBI) + CheckLineNumbers(MBI); +#endif } void @@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() { FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code if (j+1 != ej) - SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, + SDB->visitBitTestCase(SDB->BitTestCases[i], + SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); else - SDB->visitBitTestCase(SDB->BitTestCases[i].Default, + SDB->visitBitTestCase(SDB->BitTestCases[i], + SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], FuncInfo->MBB); @@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() { // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. @@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() { if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) Succs.push_back(SDB->SwitchCases[i].FalseBB); - // Emit the code. Note that this could result in ThisBB being split, so - // we need to check for updates. + // Emit the code. Note that this could result in FuncInfo->MBB being split. SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); - ThisBB = FuncInfo->MBB; + + // Remember the last block, now that any splitting is done, for use in + // populating PHI nodes in successors. + MachineBasicBlock *ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can @@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { return Ctor(this, OptLevel); } -ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { - return new ScheduleHazardRecognizer(); -} - //===----------------------------------------------------------------------===// // Helper functions used by the generated instruction selector. //===----------------------------------------------------------------------===// @@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc - Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 + Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); - if (InOps[e-1].getValueType() == MVT::Flag) - --e; // Don't process a flag operand if it is here. + if (InOps[e-1].getValueType() == MVT::Glue) + --e; // Don't process a glue operand if it is here. while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); @@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) { } } - // Add the flag input back if present. + // Add the glue input back if present. if (e != InOps.size()) Ops.push_back(InOps.back()); } -/// findFlagUse - Return use of EVT::Flag value produced by the specified +/// findGlueUse - Return use of MVT::Glue value produced by the specified /// SDNode. /// -static SDNode *findFlagUse(SDNode *N) { +static SDNode *findGlueUse(SDNode *N) { unsigned FlagResNo = N->getNumValues()-1; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { SDUse &Use = I.getUse(); @@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // never find it. // // The Use may be -1 (unassigned) if it is a newly allocated node. This can - // happen because we scan down to newly selected nodes in the case of flag + // happen because we scan down to newly selected nodes in the case of glue // uses. if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) return false; - + // Don't revisit nodes if we already scanned it and didn't fail, we know we // won't fail if we scan it again. if (!Visited.insert(Use)) @@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // Ignore chain uses, they are validated by HandleMergeInputChains. if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains) continue; - + SDNode *N = Use->getOperand(i).getNode(); if (N == Def) { if (Use == ImmedUse || Use == Root) @@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // // * indicates nodes to be folded together. // - // If Root produces a flag, then it gets (even more) interesting. Since it - // will be "glued" together with its flag use in the scheduler, we need to + // If Root produces glue, then it gets (even more) interesting. Since it + // will be "glued" together with its glue use in the scheduler, we need to // check if it might reach N. // // [N*] // @@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // ^ / // // f / // // | / // - // [FU] // + // [GU] // // - // If FU (flag use) indirectly reaches N (the load), and Root folds N - // (call it Fold), then X is a predecessor of FU and a successor of - // Fold. But since Fold and FU are flagged together, this will create + // If GU (glue use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of GU and a successor of + // Fold. But since Fold and GU are glued together, this will create // a cycle in the scheduling graph. - // If the node has flags, walk down the graph to the "lowest" node in the - // flagged set. + // If the node has glue, walk down the graph to the "lowest" node in the + // glueged set. EVT VT = Root->getValueType(Root->getNumValues()-1); - while (VT == MVT::Flag) { - SDNode *FU = findFlagUse(Root); - if (FU == NULL) + while (VT == MVT::Glue) { + SDNode *GU = findGlueUse(Root); + if (GU == NULL) break; - Root = FU; + Root = GU; VT = Root->getValueType(Root->getNumValues()-1); - - // If our query node has a flag result with a use, we've walked up it. If + + // If our query node has a glue result with a use, we've walked up it. If // the user (which has already been selected) has a chain or indirectly uses // the chain, our WalkChainUsers predicate will not consider it. Because of // this, we cannot ignore chains in this predicate. IgnoreChains = false; } - + SmallPtrSet<SDNode*, 16> Visited; return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); @@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - + std::vector<EVT> VTs; VTs.push_back(MVT::Other); - VTs.push_back(MVT::Flag); + VTs.push_back(MVT::Glue); SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); @@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { } /// GetVBR - decode a vbr encoding whose top bit is set. -ALWAYS_INLINE static uint64_t +LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { assert(Val >= 128 && "Not a VBR"); Val &= 127; // Remove first vbr bit. - + unsigned Shift = 7; uint64_t NextBits; do { @@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { Val |= (NextBits&127) << Shift; Shift += 7; } while (NextBits & 128); - + return Val; } -/// UpdateChainsAndFlags - When a match is complete, this method updates uses of -/// interior flag and chain results to use the new flag and chain results. +/// UpdateChainsAndGlue - When a match is complete, this method updates uses of +/// interior glue and chain results to use the new glue and chain results. void SelectionDAGISel:: -UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, - const SmallVectorImpl<SDNode*> &ChainNodesMatched, - SDValue InputFlag, - const SmallVectorImpl<SDNode*> &FlagResultNodesMatched, - bool isMorphNodeTo) { +UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode*> &ChainNodesMatched, + SDValue InputGlue, + const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, + bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; - + ISelUpdater ISU(ISelPosition); // Now that all the normal results are replaced, we replace the chain and - // flag results if present. + // glue results if present. if (!ChainNodesMatched.empty()) { assert(InputChain.getNode() != 0 && "Matched input chains but didn't produce a chain"); @@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain, // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; - + // If this node was already deleted, don't look at it. if (ChainNode->getOpcode() == ISD::DELETED_NODE) continue; - + // Don't replace the results of the root node if we're doing a // MorphNodeTo. if (ChainNode == NodeToMatch && isMorphNodeTo) continue; - + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); - if (ChainVal.getValueType() == MVT::Flag) + if (ChainVal.getValueType() == MVT::Glue) ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU); - + // If the node became dead and we haven't already seen it, delete it. if (ChainNode->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) NowDeadNodes.push_back(ChainNode); } } - - // If the result produces a flag, update any flag results in the matched - // pattern with the flag result. - if (InputFlag.getNode() != 0) { + + // If the result produces glue, update any glue results in the matched + // pattern with the glue result. + if (InputGlue.getNode() != 0) { // Handle any interior nodes explicitly marked. - for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) { - SDNode *FRN = FlagResultNodesMatched[i]; - + for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { + SDNode *FRN = GlueResultNodesMatched[i]; + // If this node was already deleted, don't look at it. if (FRN->getOpcode() == ISD::DELETED_NODE) continue; - - assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag && - "Doesn't have a flag result"); + + assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && + "Doesn't have a glue result"); CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), - InputFlag, &ISU); - + InputGlue, &ISU); + // If the node became dead and we haven't already seen it, delete it. if (FRN->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) NowDeadNodes.push_back(FRN); } } - + if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU); - + DEBUG(errs() << "ISEL: Match complete!\n"); } @@ -1392,17 +1589,17 @@ enum ChainResult { /// /// The walk we do here is guaranteed to be small because we quickly get down to /// already selected nodes "below" us. -static ChainResult +static ChainResult WalkChainUsers(SDNode *ChainedNode, SmallVectorImpl<SDNode*> &ChainedNodesInPattern, SmallVectorImpl<SDNode*> &InteriorChainedNodes) { ChainResult Result = CR_Simple; - + for (SDNode::use_iterator UI = ChainedNode->use_begin(), E = ChainedNode->use_end(); UI != E; ++UI) { // Make sure the use is of the chain, not some other value we produce. if (UI.getUse().getValueType() != MVT::Other) continue; - + SDNode *User = *UI; // If we see an already-selected machine node, then we've gone beyond the @@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode, if (User->isMachineOpcode() || User->getOpcode() == ISD::HANDLENODE) // Root of the graph. continue; - + if (User->getOpcode() == ISD::CopyToReg || User->getOpcode() == ISD::CopyFromReg || User->getOpcode() == ISD::INLINEASM || @@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode, if (!std::count(ChainedNodesInPattern.begin(), ChainedNodesInPattern.end(), User)) return CR_InducesCycle; - + // Otherwise we found a node that is part of our pattern. For example in: // x = load ptr // y = x+4 @@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode, InteriorChainedNodes.push_back(User); continue; } - + // If we found a TokenFactor, there are two cases to consider: first if the // TokenFactor is just hanging "below" the pattern we're matching (i.e. no // uses of the TF are in our pattern) we just want to ignore it. Second, @@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode, case CR_LeadsToInteriorNode: break; // Otherwise, keep processing. } - + // Okay, we know we're in the interesting interior case. The TokenFactor // is now going to be considered part of the pattern so that we rewrite its // uses (it may have uses that are not part of the pattern) with the @@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode, InteriorChainedNodes.push_back(User); continue; } - + return Result; } @@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, InteriorChainedNodes) == CR_InducesCycle) return SDValue(); // Would induce a cycle. } - + // Okay, we have walked all the matched nodes and collected TokenFactor nodes // that we are interested in. Form our input TokenFactor node. SmallVector<SDValue, 3> InputChains; @@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, if (N->getOpcode() != ISD::TokenFactor) { if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) continue; - + // Otherwise, add the input chain. SDValue InChain = ChainNodesMatched[i]->getOperand(0); assert(InChain.getValueType() == MVT::Other && "Not a chain"); InputChains.push_back(InChain); continue; } - + // If we have a token factor, we want to add all inputs of the token factor // that are not part of the pattern we're matching. for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { @@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, InputChains.push_back(N->getOperand(op)); } } - + SDValue Res; if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), MVT::Other, &InputChains[0], InputChains.size()); -} +} /// MorphNode - Handle morphing a node in place for the selector. SDNode *SelectionDAGISel:: @@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be - // adding a chain) and the input could have flags and chains as well. + // adding a chain) and the input could have glue and chains as well. // In this case we need to shift the operands down. // FIXME: This is a horrible hack and broken in obscure cases, no worse // than the old isel though. - int OldFlagResultNo = -1, OldChainResultNo = -1; + int OldGlueResultNo = -1, OldChainResultNo = -1; unsigned NTMNumResults = Node->getNumValues(); - if (Node->getValueType(NTMNumResults-1) == MVT::Flag) { - OldFlagResultNo = NTMNumResults-1; + if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { + OldGlueResultNo = NTMNumResults-1; if (NTMNumResults != 1 && Node->getValueType(NTMNumResults-2) == MVT::Other) OldChainResultNo = NTMNumResults-2; @@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, } unsigned ResNumResults = Res->getNumValues(); - // Move the flag if needed. - if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 && - (unsigned)OldFlagResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), + // Move the glue if needed. + if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && + (unsigned)OldGlueResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), SDValue(Res, ResNumResults-1)); - if ((EmitNodeInfo & OPFL_FlagOutput) != 0) + if ((EmitNodeInfo & OPFL_GlueOutput) != 0) --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && (unsigned)OldChainResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), SDValue(Res, ResNumResults-1)); // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. if (Res != Node) CurDAG->ReplaceAllUsesWith(Node, Res); - + return Res; } /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) { + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { // Accept if it is exactly the same as a previously recorded node. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - return N == RecordedNodes[RecNo]; + return N == RecordedNodes[RecNo].first; } - + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, SelectionDAGISel &SDISel) { return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, SelectionDAGISel &SDISel, SDNode *N) { return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; @@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N->getOpcode() == Opc; } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering &TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; - + // Handle the case when VT is iPTR. return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering &TLI, unsigned ChildNo) { @@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast<CondCodeSDNode>(N)->get() == (ISD::CondCode)MatcherTable[MatcherIndex++]; } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering &TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast<VTSDNode>(N)->getVT() == VT) return true; - + // Handle the case when VT is iPTR. return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy(); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); return C != 0 && C->getSExtValue() == Val; } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - + if (N->getOpcode() != ISD::AND) return false; - + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); } -ALWAYS_INLINE static bool +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, SelectionDAGISel &SDISel) { int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - + if (N->getOpcode() != ISD::OR) return false; - + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); } @@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, /// fail, set Result=true and return anything. If the current predicate is /// known to pass, set Result=false and return the MatcherIndex to continue /// with. If the current predicate is unknown, set Result=false and return the -/// MatcherIndex to continue with. +/// MatcherIndex to continue with. static unsigned IsPredicateKnownToFail(const unsigned char *Table, unsigned Index, SDValue N, bool &Result, SelectionDAGISel &SDISel, - SmallVectorImpl<SDValue> &RecordedNodes){ + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { switch (Table[Index++]) { default: Result = false; @@ -1782,21 +1980,21 @@ namespace { struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. unsigned FailIndex; - + /// NodeStack - The node stack when the scope was formed. SmallVector<SDValue, 4> NodeStack; - + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. unsigned NumRecordedNodes; - + /// NumMatchedMemRefs - The number of matched memref entries. unsigned NumMatchedMemRefs; - - /// InputChain/InputFlag - The current chain/flag - SDValue InputChain, InputFlag; + + /// InputChain/InputGlue - The current chain/glue + SDValue InputChain, InputGlue; /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. - bool HasChainNodesMatched, HasFlagResultNodesMatched; + bool HasChainNodesMatched, HasGlueResultNodesMatched; }; } @@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); case ISD::UNDEF: return Select_UNDEF(NodeToMatch); } - + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); // Set up the node stack with NodeToMatch as the only node on the stack. @@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // MatchScopes - Scopes used when matching, if a match failure happens, this // indicates where to continue checking. SmallVector<MatchScope, 8> MatchScopes; - + // RecordedNodes - This is the set of nodes that have been recorded by the - // state machine. - SmallVector<SDValue, 8> RecordedNodes; - + // state machine. The second value is the parent of the node, or null if the + // root is recorded. + SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; + // MatchedMemRefs - This is the set of MemRef's we've seen in the input // pattern. SmallVector<MachineMemOperand*, 2> MatchedMemRefs; - - // These are the current input chain and flag for use when generating nodes. + + // These are the current input chain and glue for use when generating nodes. // Various Emit operations change these. For example, emitting a copytoreg // uses and updates these. - SDValue InputChain, InputFlag; - + SDValue InputChain, InputGlue; + // ChainNodesMatched - If a pattern matches nodes that have input/output // chains, the OPC_EmitMergeInputChains operation is emitted which indicates // which ones they are. The result is captured into this list so that we can // update the chain results when the pattern is complete. SmallVector<SDNode*, 3> ChainNodesMatched; - SmallVector<SDNode*, 3> FlagResultNodesMatched; - + SmallVector<SDNode*, 3> GlueResultNodesMatched; + DEBUG(errs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); errs() << '\n'); - + // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we // accelerate the first lookup (which is guaranteed to be hot) with the // OpcodeOffset table. unsigned MatcherIndex = 0; - + if (!OpcodeOffset.empty()) { // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) @@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; } - + while (1) { assert(MatcherIndex < TableSize && "Invalid index"); #ifndef NDEBUG @@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // determine immediately that the first check (or first several) will // immediately fail, don't even bother pushing a scope for them. unsigned FailIndex; - + while (1) { unsigned NumToSkip = MatcherTable[MatcherIndex++]; if (NumToSkip & 128) @@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, FailIndex = 0; break; } - + FailIndex = MatcherIndex+NumToSkip; - + unsigned MatcherIndexOfPredicate = MatcherIndex; (void)MatcherIndexOfPredicate; // silence warning. - + // If we can't evaluate this predicate without pushing a scope (e.g. if // it is a 'MoveParent') or if the predicate succeeds on this node, we // push the scope and evaluate the full predicate chain. @@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, Result, *this, RecordedNodes); if (!Result) break; - + DEBUG(errs() << " Skipped scope entry (due to false predicate) at " << "index " << MatcherIndexOfPredicate << ", continuing at " << FailIndex << "\n"); ++NumDAGIselRetries; - + // Otherwise, we know that this case of the Scope is guaranteed to fail, // move to the next case. MatcherIndex = FailIndex; } - + // If the whole scope failed to match, bail. if (FailIndex == 0) break; - + // Push a MatchScope which indicates where to go if the first child fails // to match. MatchScope NewEntry; @@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NewEntry.NumRecordedNodes = RecordedNodes.size(); NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); NewEntry.InputChain = InputChain; - NewEntry.InputFlag = InputFlag; + NewEntry.InputGlue = InputGlue; NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); - NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty(); + NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty(); MatchScopes.push_back(NewEntry); continue; } - case OPC_RecordNode: + case OPC_RecordNode: { // Remember this node, it may end up being an operand in the pattern. - RecordedNodes.push_back(N); + SDNode *Parent = 0; + if (NodeStack.size() > 1) + Parent = NodeStack[NodeStack.size()-2].getNode(); + RecordedNodes.push_back(std::make_pair(N, Parent)); continue; - + } + case OPC_RecordChild0: case OPC_RecordChild1: case OPC_RecordChild2: case OPC_RecordChild3: case OPC_RecordChild4: case OPC_RecordChild5: @@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ChildNo >= N.getNumOperands()) break; // Match fails if out of range child #. - RecordedNodes.push_back(N->getOperand(ChildNo)); + RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), + N.getNode())); continue; } case OPC_RecordMemRef: MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); continue; - - case OPC_CaptureFlagInput: - // If the current node has an input flag, capture it in InputFlag. + + case OPC_CaptureGlueInput: + // If the current node has an input glue, capture it in InputGlue. if (N->getNumOperands() != 0 && - N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) - InputFlag = N->getOperand(N->getNumOperands()-1); + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) + InputGlue = N->getOperand(N->getNumOperands()-1); continue; - + case OPC_MoveChild: { unsigned ChildNo = MatcherTable[MatcherIndex++]; if (ChildNo >= N.getNumOperands()) @@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NodeStack.push_back(N); continue; } - + case OPC_MoveParent: // Pop the current node off the NodeStack. NodeStack.pop_back(); assert(!NodeStack.empty() && "Node stack imbalance!"); - N = NodeStack.back(); + N = NodeStack.back(); continue; - + case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; @@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); - if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum, + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, + RecordedNodes[RecNo].first, CPNum, RecordedNodes)) break; continue; @@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckOpcode: if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; continue; - + case OPC_CheckType: if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; continue; - + case OPC_SwitchOpcode: { unsigned CurNodeOpcode = N.getOpcode(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; @@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If the opcode matches, then we will execute this case. if (CurNodeOpcode == Opc) break; - + // Otherwise, skip over this case. MatcherIndex += CaseSize; } - + // If no cases matched, bail out. if (CaseSize == 0) break; - + // Otherwise, execute the case we found. DEBUG(errs() << " OpcodeSwitch from " << SwitchStart << " to " << MatcherIndex << "\n"); continue; } - + case OPC_SwitchType: { - MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy; + MVT CurNodeVT = N.getValueType().getSimpleVT(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (CaseSize & 128) CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); if (CaseSize == 0) break; - - MVT::SimpleValueType CaseVT = - (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + + MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI.getPointerTy().SimpleTy; - + CaseVT = TLI.getPointerTy(); + // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) break; - + // Otherwise, skip over this case. MatcherIndex += CaseSize; } - + // If no cases matched, bail out. if (CaseSize == 0) break; - + // Otherwise, execute the case we found. DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); @@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckOrImm: if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; - + case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); // Verify that all intermediate nodes between the root and this one have @@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NodeToMatch, OptLevel, true/*We validate our own chains*/)) break; - + continue; } case OPC_EmitInteger: { @@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT)); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getTargetConstant(Val, VT), (SDNode*)0)); continue; } case OPC_EmitRegister: { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; - RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT)); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); continue; } - + case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - SDValue Imm = RecordedNodes[RecNo]; + SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); @@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); } - - RecordedNodes.push_back(Imm); + + RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); continue; } - + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 // These are space-optimized forms of OPC_EmitMergeInputChains. @@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, "EmitMergeInputChains should be the first chain producing node"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); - + // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); - + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && - !RecordedNodes[RecNo].hasOneUse()) { + !RecordedNodes[RecNo].first.hasOneUse()) { ChainNodesMatched.clear(); break; } - + // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - + if (InputChain.getNode() == 0) break; // Failed to merge. continue; } - + case OPC_EmitMergeInputChains: { assert(InputChain.getNode() == 0 && "EmitMergeInputChains should be the first chain producing node"); @@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode()); - + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && - !RecordedNodes[RecNo].hasOneUse()) { + !RecordedNodes[RecNo].first.hasOneUse()) { ChainNodesMatched.clear(); break; } } - + // If the inner loop broke out, the match fails. if (ChainNodesMatched.empty()) break; // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - + if (InputChain.getNode() == 0) break; // Failed to merge. continue; } - + case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; - + if (InputChain.getNode() == 0) InputChain = CurDAG->getEntryNode(); - + InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), - DestPhysReg, RecordedNodes[RecNo], - InputFlag); - - InputFlag = InputChain.getValue(1); + DestPhysReg, RecordedNodes[RecNo].first, + InputGlue); + + InputGlue = InputChain.getValue(1); continue; } - + case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo)); + SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0)); continue; } - + case OPC_EmitNode: case OPC_MorphNodeTo: { uint16_t TargetOpc = MatcherTable[MatcherIndex++]; @@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; VTs.push_back(VT); } - + if (EmitNodeInfo & OPFL_Chain) VTs.push_back(MVT::Other); - if (EmitNodeInfo & OPFL_FlagOutput) - VTs.push_back(MVT::Flag); - + if (EmitNodeInfo & OPFL_GlueOutput) + VTs.push_back(MVT::Glue); + // This is hot code, so optimize the two most common cases of 1 and 2 // results. SDVTList VTList; @@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RecNo = MatcherTable[MatcherIndex++]; if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); - Ops.push_back(RecordedNodes[RecNo]); + Ops.push_back(RecordedNodes[RecNo].first); } - + // If there are variadic operands to add, handle them now. if (EmitNodeInfo & OPFL_VariadicInfo) { // Determine the start index to copy from. @@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && "Invalid variadic node"); - // Copy all of the variadic operands, not including a potential flag + // Copy all of the variadic operands, not including a potential glue // input. for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); i != e; ++i) { SDValue V = NodeToMatch->getOperand(i); - if (V.getValueType() == MVT::Flag) break; + if (V.getValueType() == MVT::Glue) break; Ops.push_back(V); } } - - // If this has chain/flag inputs, add them. + + // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); - if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0) - Ops.push_back(InputFlag); - + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) + Ops.push_back(InputGlue); + // Create the node. SDNode *Res = 0; if (Opcode != OPC_MorphNodeTo) { @@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), VTList, Ops.data(), Ops.size()); - - // Add all the non-flag/non-chain results to the RecordedNodes list. + + // Add all the non-glue/non-chain results to the RecordedNodes list. for (unsigned i = 0, e = VTs.size(); i != e; ++i) { - if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break; - RecordedNodes.push_back(SDValue(Res, i)); + if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), + (SDNode*) 0)); } - + } else { Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), EmitNodeInfo); } - - // If the node had chain/flag results, update our notion of the current - // chain and flag. - if (EmitNodeInfo & OPFL_FlagOutput) { - InputFlag = SDValue(Res, VTs.size()-1); + + // If the node had chain/glue results, update our notion of the current + // chain and glue. + if (EmitNodeInfo & OPFL_GlueOutput) { + InputGlue = SDValue(Res, VTs.size()-1); if (EmitNodeInfo & OPFL_Chain) InputChain = SDValue(Res, VTs.size()-2); } else if (EmitNodeInfo & OPFL_Chain) InputChain = SDValue(Res, VTs.size()-1); - // If the OPFL_MemRefs flag is set on this node, slap all of the + // If the OPFL_MemRefs glue is set on this node, slap all of the // accumulated memrefs onto it. // // FIXME: This is vastly incorrect for patterns with multiple outputs @@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, cast<MachineSDNode>(Res) ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size()); } - + DEBUG(errs() << " " << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") << " node: "; Res->dump(CurDAG); errs() << "\n"); - + // If this was a MorphNodeTo then we're completely done! if (Opcode == OPC_MorphNodeTo) { - // Update chain and flag uses. - UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, - InputFlag, FlagResultNodesMatched, true); + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, true); return Res; } - + continue; } - - case OPC_MarkFlagResults: { + + case OPC_MarkGlueResults: { unsigned NumNodes = MatcherTable[MatcherIndex++]; - - // Read and remember all the flag-result nodes. + + // Read and remember all the glue-result nodes. for (unsigned i = 0; i != NumNodes; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); - FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode()); + GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; } - + case OPC_CompleteMatch: { // The match has been completed, and any new nodes (if any) have been // created. Patch up references to the matched dag to use the newly @@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned ResSlot = MatcherTable[MatcherIndex++]; if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - + assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); - SDValue Res = RecordedNodes[ResSlot]; - + SDValue Res = RecordedNodes[ResSlot].first; + assert(i < NodeToMatch->getNumValues() && NodeToMatch->getValueType(i) != MVT::Other && - NodeToMatch->getValueType(i) != MVT::Flag && + NodeToMatch->getValueType(i) != MVT::Glue && "Invalid number of results to complete!"); assert((NodeToMatch->getValueType(i) == Res.getValueType() || NodeToMatch->getValueType(i) == MVT::iPTR || @@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); } - // If the root node defines a flag, add it to the flag nodes to update - // list. - if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag) - FlagResultNodesMatched.push_back(NodeToMatch); - - // Update chain and flag uses. - UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched, - InputFlag, FlagResultNodesMatched, false); - + // If the root node defines glue, add it to the glue nodes to update list. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) + GlueResultNodesMatched.push_back(NodeToMatch); + + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, false); + assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); - + // FIXME: We just return here, which interacts correctly with SelectRoot // above. We should fix this to not return an SDNode* anymore. return 0; } } - + // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. @@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - + DEBUG(errs() << " Continuing at " << MatcherIndex << "\n"); - + InputChain = LastScope.InputChain; - InputFlag = LastScope.InputFlag; + InputGlue = LastScope.InputGlue; if (!LastScope.HasChainNodesMatched) ChainNodesMatched.clear(); - if (!LastScope.HasFlagResultNodesMatched) - FlagResultNodesMatched.clear(); + if (!LastScope.HasGlueResultNodesMatched) + GlueResultNodesMatched.clear(); // Check to see what the offset is at the new MatcherIndex. If it is zero // we have reached the end of this scope, otherwise we have another child @@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, LastScope.FailIndex = MatcherIndex+NumToSkip; break; } - + // End of this scope, pop it and try the next child in the containing // scope. MatchScopes.pop_back(); } } } - + void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); - Msg << "Cannot yet select: "; - + Msg << "Cannot select: "; + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && N->getOpcode() != ISD::INTRINSIC_VOID) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 8313de5e32bb..76eb9453561e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -93,7 +93,7 @@ namespace llvm { static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); EVT VT = Op.getValueType(); - if (VT == MVT::Flag) + if (VT == MVT::Glue) return "color=red,style=bold"; else if (VT == MVT::Other) return "color=blue,style=dashed"; @@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { raw_string_ostream O(s); O << "SU(" << SU->NodeNum << "): "; if (SU->getNode()) { - SmallVector<SDNode *, 4> FlaggedNodes; - for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) - FlaggedNodes.push_back(N); - while (!FlaggedNodes.empty()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { O << DOTGraphTraits<SelectionDAG*> - ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); - FlaggedNodes.pop_back(); - if (!FlaggedNodes.empty()) + ::getSimpleNodeLabel(GluedNodes.back(), DAG); + GluedNodes.pop_back(); + if (!GluedNodes.empty()) O << "\n "; } } else { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b74f600cfa2d..691390e2a0e4 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include <cctype> using namespace llvm; namespace llvm { @@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); } - + // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); @@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // Most targets ignore the @llvm.prefetch intrinsic. setOperationAction(ISD::PREFETCH, MVT::Other, Expand); - - // ConstantFP nodes default to expand. Targets can either change this to + + // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. setOperationAction(ISD::ConstantFP, MVT::f32, Expand); @@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm, // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); - + IsLittleEndian = TD->isLittleEndian(); ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; + maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize + = maxStoresPerMemmoveOptSize = 4; benefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; IntDivIsCheap = false; Pow2DivIsCheap = false; + JumpIsExpensive = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); - + unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } - + // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { @@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, } NumIntermediates = NumVectorRegs; - + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); if (!TLI->isTypeLegal(NewVT)) NewVT = EltTy; @@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, RegisterVT = DestVT; if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - + // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; @@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() { RegisterTypeForVT[MVT::ppcf128] = MVT::f64; TransformToType[MVT::ppcf128] = MVT::f64; ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); - } + } // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. @@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() { ValueTypeActions.setTypeAction(MVT::f32, Expand); } } - + // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; if (isTypeLegal(VT)) continue; - + // Determine if there is a legal wider type. If so, we should promote to // that wider vector type. EVT EltVT = VT.getVectorElementType(); @@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() { for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { EVT SVT = (MVT::SimpleValueType)nVT; if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeSynthesizable(SVT)) { + SVT.getVectorNumElements() > NElts && + isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() { } if (IsLegalWiderType) continue; } - + MVT IntermediateVT; EVT RegisterVT; unsigned NumIntermediates; @@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() { getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; - + EVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. @@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, unsigned &NumIntermediates, EVT &RegisterVT) const { unsigned NumElts = VT.getVectorNumElements(); - + // If there is a wider vector type with the same element type as this one, // we should widen to that legal vector type. This handles things like // <2 x float> -> <4 x float>. @@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, return 1; } } - + // Figure out the right, legal destination reg to copy into. EVT EltTy = VT.getVectorElementType(); - + unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we // could break down into LHS/RHS like LegalizeDAG does. if (!isPowerOf2_32(NumElts)) { NumVectorRegs = NumElts; NumElts = 1; } - + // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. while (NumElts > 1 && !isTypeLegal( @@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, } NumIntermediates = NumVectorRegs; - + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); if (!isTypeLegal(NewVT)) NewVT = EltTy; @@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, RegisterVT = DestVT; if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - + // Otherwise, promotion or legal types use the same number of registers as // the vector decimated to the appropriate level. return NumVectorRegs; } -/// Get the EVTs and ArgFlags collections that represent the legalized return +/// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. /// TODO: Move this out of TargetLowering.cpp. @@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return MachineJumpTableInfo::EK_BlockAddress; - + // In PIC mode, if the target supports a GPRel32 directive, use it. if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) return MachineJumpTableInfo::EK_GPRel32BlockAddress; - + // Otherwise, use a label difference. return MachineJumpTableInfo::EK_LabelDifference32; } @@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // Optimization Methods //===----------------------------------------------------------------------===// -/// ShrinkDemandedConstant - Check to see if the specified operand of the +/// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there /// are any bits set in the constant that are not demanded. If so, shrink the /// constant and return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { DebugLoc dl = Op.getDebugLoc(); @@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, EVT VT = Op.getValueType(); SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), DAG.getConstant(Demanded & - C->getAPIntValue(), + C->getAPIntValue(), VT)); return CombineTo(Op, New); } @@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero = KnownOne = APInt(BitWidth, 0); // Other users may use these bits. - if (!Op.getNode()->hasOneUse()) { + if (!Op.getNode()->hasOneUse()) { if (Depth != 0) { - // If not at the root, Just compute the KnownZero/KnownOne bits to + // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); return false; @@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If this is the root being simplified, allow it to have multiple uses, // just set the NewMask to all bits. NewMask = APInt::getAllOnesValue(BitWidth); - } else if (DemandedMask == 0) { + } else if (DemandedMask == 0) { // Not demanding any bits from Op. if (Op.getOpcode() != ISD::UNDEF) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); @@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the RHS. if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; + // Do not increment Depth here; that can cause an infinite loop. TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth+1); + LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) return true; } - + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) @@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero |= KnownZero2; break; case ISD::OR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) @@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne |= KnownOne2; break; case ISD::XOR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if ((KnownZero & NewMask) == NewMask) @@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); - + // Output known-0 bits are known if clear or set in both the LHS & RHS. KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - + // If all of the demanded bits on one side are known, and all of the set // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. @@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownOne & KnownOne2) == KnownOne) { EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } } - + // If the RHS is a constant, see if we can simplify it. // for XOR, we prefer to force bits to 1 if they will make a -1. // if we can't force bits, try to shrink constant @@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne = KnownOneOut; break; case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If the operands are constants, see if we can simplify them. if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; - + // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, KnownOne2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If the operands are constants, see if we can simplify them. if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; - + // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; @@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Diff < 0) { Diff = -Diff; Opc = ISD::SRL; - } - - SDValue NewSA = + } + + SDValue NewSA = TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } - } - + } + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; @@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, unsigned ShAmt = SA->getZExtValue(); unsigned VTSize = VT.getSizeInBits(); SDValue InOp = Op.getOperand(0); - + // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) break; @@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Diff < 0) { Diff = -Diff; Opc = ISD::SHL; - } - + } + SDValue NewSA = TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } - } - + } + // Compute the new bits that are at the top now. if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { EVT VT = Op.getValueType(); unsigned ShAmt = SA->getZExtValue(); - + // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) break; @@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); - + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - + // Handle the sign bit, adjusted to where it is now in the mask. APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); - + // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); } else if (KnownOne.intersects(SignBit)) { // New bits are known one. @@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - // Sign extension. Compute the demanded bits in the result that are not + // Sign extension. Compute the demanded bits in the result that are not // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EVT.getScalarType().getSizeInBits()); - + // If none of the extended bits are demanded, eliminate the sextinreg. if ((NewBits & NewMask) == 0) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits()); - InSignBit.zext(BitWidth); + APInt InSignBit = + APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EVT.getScalarType().getSizeInBits()) & NewMask; - + // Since the sign extended bits are demanded, we know that the sign // bit is demanded. InputDemandedBits |= InSignBit; @@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - + // If the input sign bit is known zero, convert this into a zero extension. if (KnownZero.intersects(InSignBit)) - return TLO.CombineTo(Op, + return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT)); - + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set KnownOne |= NewBits; KnownZero &= ~NewBits; @@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); - APInt InMask = NewMask; - InMask.trunc(OperandBitWidth); - + APInt InMask = NewMask.trunc(OperandBitWidth); + // If none of the top bits are demanded, convert this into an any_extend. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; if (!NewBits.intersects(NewMask)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + Op.getValueType(), Op.getOperand(0))); - + if (SimplifyDemandedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; break; } @@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); APInt NewBits = ~InMask & NewMask; - + // If none of the top bits are demanded, convert this into an any_extend. if (NewBits == 0) return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); - + // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. APInt InDemandedBits = InMask & NewMask; InDemandedBits |= InSignBit; - InDemandedBits.trunc(InBits); - - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + InDemandedBits = InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, KnownOne, TLO, Depth+1)) return true; - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); - + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + // If the sign bit is known zero, convert this to a zero extend. if (KnownZero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, - Op.getValueType(), + Op.getValueType(), Op.getOperand(0))); - + // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { KnownOne |= NewBits; @@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::ANY_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); - APInt InMask = NewMask; - InMask.trunc(OperandBitWidth); + APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero.zext(BitWidth); - KnownOne.zext(BitWidth); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); break; } case ISD::TRUNCATE: { @@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // zero/one bits live out. unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); - APInt TruncMask = NewMask; - TruncMask.zext(OperandBitWidth); + APInt TruncMask = NewMask.zext(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - KnownZero.trunc(BitWidth); - KnownOne.trunc(BitWidth); - + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. if (Op.getOperand(0).getNode()->hasOneUse()) { @@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()); - HighBits.trunc(BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), + Op.getValueType(), In.getOperand(0)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - NewTrunc, + NewTrunc, In.getOperand(1))); } break; } } - - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, @@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownZero |= ~InMask & NewMask; break; } - case ISD::BIT_CONVERT: + case ISD::BITCAST: #if 0 // If this is an FP->Int bitcast and if the sign bit is the only thing that // is demanded, turn this into a FGETSIGN. @@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. - SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), Op.getOperand(0)); unsigned ShVal = Op.getValueType().getSizeInBits()-1; SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); @@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); break; } - + // If we know the value of all of the demanded bits, return this as a // constant. if ((NewMask & (KnownZero|KnownOne)) == NewMask) return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); - + return false; } -/// computeMaskedBitsForTargetNode - Determine which of the bits specified -/// in Mask are known to be either zero or one and return them in the +/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. -void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, +void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { @@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { (KnownOne.countPopulation() == 1); } -/// SimplifySetCC - Try to simplify a setcc built with the specified operands +/// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || N0.getValueType().getSizeInBits() > + Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, + DAG.getConstant(1, CTVT)); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC); + } + + // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!Lod->isVolatile() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueType().getSizeInBits(); unsigned maskWidth = origWidth; - // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to // 8 bits, but have to be careful... if (Lod->getExtensionType() != ISD::NON_EXTLOAD) origWidth = Lod->getMemoryVT().getSizeInBits(); @@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(bestOffset, PtrType)); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getSrcValue(), - Lod->getSrcValueOffset() + bestOffset, + Lod->getPointerInfo().getWithOffset(bestOffset), false, false, NewAlign); - return DAG.getSetCC(dl, VT, + return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), newVT)), @@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isOperationLegal(ISD::SETCC, newVT) && getCondCodeAction(Cond, newVT)==Legal)) return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(APInt(C1).trunc(InSize), newVT), + DAG.getConstant(C1.trunc(InSize), newVT), Cond); break; } @@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the sign extension, it is impossible for both sides to be equal. if (C1.getMinSignedBits() > ExtSrcTyBits) return DAG.getConstant(Cond == ISD::SETNE, VT); - + SDValue ZextOp; EVT Op0Ty = N0.getOperand(0).getValueType(); if (Op0Ty == ExtSrcTy) { @@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); // Otherwise, make this a use of a zext. - return DAG.getSetCC(dl, VT, ZextOp, + return DAG.getSetCC(dl, VT, ZextOp, DAG.getConstant(C1 & APInt::getLowBitsSet( ExtDstTyBits, - ExtSrcTyBits), + ExtSrcTyBits), ExtDstTy), Cond); } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && @@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); if (TrueWhenTrue) - return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, + CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || - (N0.getOpcode() == ISD::AND && + (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa<ConstantSDNode>(N0.getOperand(1)) && @@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOpcode() == ISD::XOR) Val = N0.getOperand(0); else { - assert(N0.getOpcode() == ISD::AND && + assert(N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR); // ((X^1)&1)^1 -> X & 1 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), @@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } } - + APInt MinVal, MaxVal; unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { @@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(C1-1, N1.getValueType()), (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); } @@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETLE || Cond == ISD::SETULE) { if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(C1+1, N1.getValueType()), (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); } @@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If we have setult X, 1, turn it into seteq X, 0 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, N0.getValueType()), + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), ISD::SETEQ); // If we have setugt X, Max-1, turn it into seteq X, Max else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(MaxVal, N0.getValueType()), ISD::SETEQ); @@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // by changing cc. // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && + if (Cond == ISD::SETUGT && C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, + return DAG.getSetCC(dl, VT, N0, DAG.getConstant(0, N1.getValueType()), ISD::SETLT); @@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getUNDEF(VT); } } - + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the // constant if knowing that the operand is non-nan is enough. We prefer to // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to @@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DAG.isCommutativeBinOp(N0.getOpcode())) { // If X op Y == Y op X, try other combinations. if (N0.getOperand(0) == N1.getOperand(1)) - return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), Cond); if (N0.getOperand(1) == N1.getOperand(0)) - return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), Cond); } } - + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LHSR->getAPIntValue(), N0.getValueType()), Cond); } - + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. if (N0.getOpcode() == ISD::XOR) // If we know that all of the inverted bits are zero, don't bother @@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getValueType()), Cond); } - + // Turn (C1-X) == C2 --> X == C1-C2 if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { @@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getValueType()), Cond); } - } + } } // Simplify (X+Z) == X --> Z == 0 @@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + N1, DAG.getConstant(1, getShiftAmountTy())); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); @@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, DAG.getConstant(1, getShiftAmountTy())); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); @@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. -bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const { if (isa<GlobalAddressSDNode>(N)) { GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); @@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA, } } } + return false; } @@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { return C_Memory; case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer + case 'E': // Floating Point Constant + case 'F': // Floating Point Constant case 's': // Relocatable Constant + case 'p': // Address. case 'X': // Allow ANY value. case 'I': // Target registers. case 'J': @@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { case 'N': case 'O': case 'P': + case '<': + case '>': return C_Other; } } - - if (Constraint.size() > 1 && Constraint[0] == '{' && + + if (Constraint.size() > 1 && Constraint[0] == '{' && Constraint[Constraint.size()-1] == '}') return C_Register; return C_Unknown; @@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // is possible and fine if either GV or C are missing. ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); - + // If we have "(add GV, C)", pull out GV/C if (Op.getOpcode() == ISD::ADD) { C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); @@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (C == 0 || GA == 0) C = 0, GA = 0; } - + // If we find a valid operand, map to the TargetXXX version so that the // value itself doesn't get selected. if (GA) { // Either &GV or &GV+C if (ConstraintLetter != 'n') { int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), C ? C->getDebugLoc() : DebugLoc(), Op.getValueType(), Offs)); return; @@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; - - // If none of the value types for this register class are valid, we + + // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. bool isLegal = false; for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); @@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint, break; } } - + if (!isLegal) continue; - - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { if (RegName.equals_lower(RI->getName(*I))) return std::make_pair(*I, RC); } } - + return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0)); } @@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { } +/// ParseConstraints - Split up the constraint string from the inline +/// assembly value into the specific constraints and their prefixes, +/// and also tie in the associated operand values. +/// If this returns an empty vector, and if the constraint string itself +/// isn't empty, there was an error parsing. +TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( + ImmutableCallSite CS) const { + /// ConstraintOperands - Information about all of the constraints. + AsmOperandInfoVector ConstraintOperands; + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + unsigned maCount = 0; // Largest number of multiple alternative constraints. + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + InlineAsm::ConstraintInfoVector + ConstraintInfos = IA->ParseConstraints(); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + + for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { + ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Update multiple alternative constraint count. + if (OpInfo.multipleAlternatives.size() > maCount) + maCount = OpInfo.multipleAlternatives.size(); + + OpInfo.ConstraintVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && + "Bad inline asm!"); + if (const StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = getValueType(CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + if (OpInfo.CallOperandVal) { + const llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); + if (OpInfo.isIndirect) { + const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = TD->getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpInfo.ConstraintVT = + EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); + break; + } + } else if (dyn_cast<PointerType>(OpTy)) { + OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize()); + } else { + OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); + } + } + } + + // If we have multiple alternative constraints, select the best alternative. + if (ConstraintInfos.size()) { + if (maCount) { + unsigned bestMAIndex = 0; + int bestWeight = -1; + // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. + int weight = -1; + unsigned maIndex; + // Compute the sums of the weights for each alternative, keeping track + // of the best (highest weight) one so far. + for (maIndex = 0; maIndex < maCount; ++maIndex) { + int weightSum = 0; + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + if (OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is an output operand with a matching input operand, + // look up the matching input. If their types mismatch, e.g. one + // is an integer, the other is floating point, or their sizes are + // different, flag it as an maCantMatch. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + weightSum = -1; // Can't match. + break; + } + } + } + weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); + if (weight == -1) { + weightSum = -1; + break; + } + weightSum += weight; + } + // Update best. + if (weightSum > bestWeight) { + bestWeight = weightSum; + bestMAIndex = maIndex; + } + } + + // Now select chosen alternative in each constraint. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; + if (cInfo.Type == InlineAsm::isClobber) + continue; + cInfo.selectAlternative(bestMAIndex); + } + } + } + + // Check and hook up tied operands, choose constraint code to use. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + } + + } + } + + return ConstraintOperands; +} + + /// getConstraintGenerality - Return an integer indicating how general CT /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { @@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { } } +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getMultipleConstraintMatchWeight( + AsmOperandInfo &info, int maIndex) const { + InlineAsm::ConstraintCodeVector *rCodes; + if (maIndex >= (int)info.multipleAlternatives.size()) + rCodes = &info.Codes; + else + rCodes = &info.multipleAlternatives[maIndex].Codes; + ConstraintWeight BestWeight = CW_Invalid; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { + ConstraintWeight weight = + getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); + if (weight > BestWeight) + BestWeight = weight; + } + + return BestWeight; +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + case 'i': // immediate integer. + case 'n': // immediate integer with a known value. + if (isa<ConstantInt>(CallOperandVal)) + weight = CW_Constant; + break; + case 's': // non-explicit intregal immediate. + if (isa<GlobalValue>(CallOperandVal)) + weight = CW_Constant; + break; + case 'E': // immediate float if host format. + case 'F': // immediate float. + if (isa<ConstantFP>(CallOperandVal)) + weight = CW_Constant; + break; + case '<': // memory operand with autodecrement. + case '>': // memory operand with autoincrement. + case 'm': // memory operand. + case 'o': // offsettable memory operand + case 'V': // non-offsettable memory operand + weight = CW_Memory; + break; + case 'r': // general register. + case 'g': // general register, memory operand or immediate integer. + // note: Clang converts "g" to "imr". + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + case 'X': // any operand. + default: + weight = CW_Default; + break; + } + return weight; +} + /// ChooseConstraint - If there are multiple different constraints that we /// could pick for this operand (e.g. "imr") try to pick the 'best' one. /// This is somewhat tricky: constraints fall into four classes: @@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, break; } } - + // Things with matching constraints can only be registers, per gcc // documentation. This mainly affects "g" constraints. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) continue; - + // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { @@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, BestGenerality = Generality; } } - + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; OpInfo.ConstraintType = BestType; } @@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, /// type to use for the specific AsmOperandInfo, setting /// OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, - SDValue Op, + SDValue Op, SelectionDAG *DAG) const { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); - + // Single-letter constraints ('r') are very common. if (OpInfo.Codes.size() == 1) { OpInfo.ConstraintCode = OpInfo.Codes[0]; @@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } else { ChooseConstraint(OpInfo, *this, Op, DAG); } - + // 'X' matches anything. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { // Labels and constants are handled elsewhere ('X' is the only thing @@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.CallOperandVal = v; return; } - + // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { @@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, +bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. @@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, // Allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; - + // No global is ever allowed as a base. if (AM.BaseGV) return false; - - // Only support r+r, + + // Only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; @@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, // Allow 2*r as r+r. break; } - + return true; } @@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, +SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, std::vector<SDNode*>* Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); - + // Check to see if we can do this. // FIXME: We should be more aggressive here. if (!isTypeLegal(VT)) return SDValue(); - + APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); APInt::ms magics = d.magic(); - + // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type SDValue Q; @@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator - if (d.isStrictlyPositive() && magics.m.isNegative()) { + if (d.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); @@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, } // Shift right algebraic if shift value is nonzero if (magics.s > 0) { - Q = DAG.getNode(ISD::SRA, dl, VT, Q, + Q = DAG.getNode(ISD::SRA, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy())); if (Created) Created->push_back(Q.getNode()); @@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, if (magics.a == 0) { assert(magics.s < N1C->getAPIntValue().getBitWidth() && "We shouldn't generate an undefined shift!"); - return DAG.getNode(ISD::SRL, dl, VT, Q, + return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy())); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); if (Created) Created->push_back(NPQ.getNode()); - NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, + NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, getShiftAmountTy())); if (Created) Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); if (Created) Created->push_back(NPQ.getNode()); - return DAG.getNode(ISD::SRL, dl, VT, NPQ, + return DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(magics.s-1, getShiftAmountTy())); } } diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index aeaa38b56433..7b5bca495206 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -226,7 +226,7 @@ bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); CSRegSet prevAnticIn = AnticIn[MBB]; AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; - if (prevAnticIn |= AnticIn[MBB]) + if (prevAnticIn != AnticIn[MBB]) changed = true; return changed; } @@ -264,7 +264,7 @@ bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); CSRegSet prevAvailOut = AvailOut[MBB]; AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; - if (prevAvailOut |= AvailOut[MBB]) + if (prevAvailOut != AvailOut[MBB]) changed = true; return changed; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index b29ea19835bc..2843c1a5b6d8 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "regcoalescing" #include "SimpleRegisterCoalescing.h" #include "VirtRegMap.h" +#include "LiveDebugVariables.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -64,9 +65,25 @@ DisablePhysicalJoin("disable-physical-join", cl::desc("Avoid coalescing physical register copies"), cl::init(false), cl::Hidden); -INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer, +static cl::opt<bool> +VerifyCoalescing("verify-coalescing", + cl::desc("Verify machine instrs before and after register coalescing"), + cl::Hidden); + +INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", - false, false, true); + false, false, true) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) +INITIALIZE_PASS_DEPENDENCY(PHIElimination) +INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer, + "simple-register-coalescing", "Simple Register Coalescing", + false, false, true) char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID; @@ -75,14 +92,14 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AliasAnalysis>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<LiveIntervals>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addPreservedID(MachineDominatorsID); - if (StrongPHIElim) - AU.addPreservedID(StrongPHIEliminationID); - else - AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(StrongPHIEliminationID); + AU.addPreservedID(PHIEliminationID); AU.addPreservedID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -124,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->getCopy()) return false; + if (!BValNo->isDefByCopy()) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. @@ -218,7 +235,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, continue; LiveInterval &SRLI = li_->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, - SRLI.getNextValue(FillerStart, 0, true, + SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator()))); } } @@ -266,9 +283,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; - // When BValNo is null, we're looking for a dummy clobber-value for a subreg. - if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy()) - continue; if (BI->start <= AI->start && BI->end > AI->start) return true; if (BI->start > AI->start && BI->start < AI->end) @@ -278,16 +292,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, return false; } -static void -TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { - for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); - i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isImplicit()) - NewMI->addOperand(MO); - } -} - /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with /// IntA being the source and IntB being the dest, thus this defines a value /// number in IntB. If the source value number (in IntA) is defined by a @@ -324,8 +328,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (!li_->hasInterval(CP.getDstReg())) return false; - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); LiveInterval &IntA = li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -334,27 +337,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; + VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); + if (!BValNo || !BValNo->isDefByCopy()) + return false; - // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we - // can't process it. - if (!BValNo->getCopy()) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - LiveInterval::iterator ALR = - IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); // + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + assert(AValNo && "COPY source not live"); - assert(ALR != IntA.end() && "Live range not found!"); - VNInfo *AValNo = ALR->valno; // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be - // tested? - if (AValNo->isPHIDef() || !AValNo->isDefAccurate() || - AValNo->isUnused() || AValNo->hasPHIKill()) + // the optimization. + if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -411,7 +406,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, return false; } - DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI); + DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); // At this point we have decided that it is legal to do this // transformation. Start by commuting the instruction. @@ -427,10 +423,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); NewMI->getOperand(OpIdx).setIsKill(); - bool BHasPHIKill = BValNo->hasPHIKill(); - SmallVector<VNInfo*, 4> BDeadValNos; - std::map<SlotIndex, SlotIndex> BExtend; - // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. // A = or A, B // ... @@ -439,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, // C = A<kill> // ... // = B - bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; - if (Extended) - BExtend[ALR->end] = BLR->end; // Update uses of IntA of the specific Val# with IntB. for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), @@ -467,52 +456,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); if (UseMI == CopyMI) continue; - if (UseMO.isKill()) { - if (Extended) - UseMO.setIsKill(false); - } if (!UseMI->isCopy()) continue; if (UseMI->getOperand(0).getReg() != IntB.reg || UseMI->getOperand(0).getSubReg()) continue; - // This copy will become a noop. If it's defining a new val#, - // remove that val# as well. However this live range is being - // extended to the end of the existing live range defined by the copy. + // This copy will become a noop. If it's defining a new val#, merge it into + // BValNo. SlotIndex DefIdx = UseIdx.getDefIndex(); - const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); - if (!DLR) + VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); + if (!DVNI) continue; - BHasPHIKill |= DLR->valno->hasPHIKill(); - assert(DLR->valno->def == DefIdx); - BDeadValNos.push_back(DLR->valno); - BExtend[DLR->start] = DLR->end; + DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + assert(DVNI->def == DefIdx); + BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); JoinedCopies.insert(UseMI); } - // We need to insert a new liverange: [ALR.start, LastUse). It may be we can - // simply extend BLR if CopyMI doesn't end the range. - DEBUG({ - dbgs() << "Extending: "; - IntB.print(dbgs(), tri_); - }); - - // Remove val#'s defined by copies that will be coalesced away. - for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { - VNInfo *DeadVNI = BDeadValNos[i]; - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { - for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) { - if (!li_->hasInterval(*AS)) - continue; - LiveInterval &ASLI = li_->getInterval(*AS); - if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def)) - ASLI.removeValNo(ASLR->valno); - } - } - IntB.removeValNo(BDeadValNos[i]); - } - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition // is updated. VNInfo *ValNo = BValNo; @@ -521,30 +482,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - SlotIndex End = AI->end; - std::map<SlotIndex, SlotIndex>::iterator - EI = BExtend.find(End); - if (EI != BExtend.end()) - End = EI->second; - IntB.addRange(LiveRange(AI->start, End, ValNo)); + IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); } - ValNo->setHasPHIKill(BHasPHIKill); - - DEBUG({ - dbgs() << " result = "; - IntB.print(dbgs(), tri_); - dbgs() << "\nShortening: "; - IntA.print(dbgs(), tri_); - }); + DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); IntA.removeValNo(AValNo); - - DEBUG({ - dbgs() << " result = "; - IntA.print(dbgs(), tri_); - dbgs() << '\n'; - }); - + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; return true; } @@ -644,6 +587,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, + bool preserveSrcInt, unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI) { @@ -652,12 +596,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be - // tested? - if (ValNo->isPHIDef() || !ValNo->isDefAccurate() || - ValNo->isUnused() || ValNo->hasPHIKill()) + // the optimization. + if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + if (!DefMI) + return false; assert(DefMI && "Defining instruction disappeared"); const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isAsCheapAsAMove()) @@ -681,8 +625,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; } - // If destination register has a sub-register index on it, make sure it mtches - // the instruction register class. + // If destination register has a sub-register index on it, make sure it + // matches the instruction register class. if (DstSubIdx) { const TargetInstrDesc &TID = DefMI->getDesc(); if (TID.getNumDefs() != 1) @@ -699,30 +643,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, RemoveCopyFlag(DstReg, CopyMI); - // If copy kills the source register, find the last use and propagate - // kill. - bool checkForDeadDef = false; MachineBasicBlock *MBB = CopyMI->getParent(); - if (SrcLR->end == CopyIdx.getDefIndex()) - if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { - checkForDeadDef = true; - } - MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); MachineInstr *NewMI = prior(MII); - if (checkForDeadDef) { - // PR4090 fix: Trim interval failed because there was no use of the - // source interval in this MBB. If the def is in this MBB too then we - // should mark it dead: - if (DefMI->getParent() == MBB) { - DefMI->addRegisterDead(SrcInt.reg, tri_); - SrcLR->end = SrcLR->start.getNextSlot(); - } - } - // CopyMI may have implicit operands, transfer them over to the newly // rematerialized instruction. And update implicit def interval valnos. for (unsigned i = CopyMI->getDesc().getNumOperands(), @@ -734,13 +660,18 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, RemoveCopyFlag(MO.getReg(), CopyMI); } - TransferImplicitOps(CopyMI, NewMI); + NewMI->copyImplicitOps(CopyMI); li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); ReMatDefs.insert(DefMI); DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; + + // The source interval can become smaller because we removed a use. + if (preserveSrcInt) + li_->shrinkToUses(&SrcInt); + return true; } @@ -756,6 +687,9 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { unsigned DstReg = CP.getDstReg(); unsigned SubIdx = CP.getSubIdx(); + // Update LiveDebugVariables. + ldv_->renameRegister(SrcReg, DstReg, SubIdx); + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { // A PhysReg copy that won't be coalesced can perhaps be rematerialized @@ -768,7 +702,7 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { UseMI->getOperand(0).getReg() != SrcReg && UseMI->getOperand(0).getReg() != DstReg && !JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), + ReMaterializeTrivialDef(li_->getInterval(SrcReg), false, UseMI->getOperand(0).getReg(), 0, UseMI)) continue; } @@ -874,7 +808,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, if (li_->hasInterval(DstReg)) { LiveInterval &LI = li_->getInterval(DstReg); if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->getCopy() == CopyMI) + if (LR->valno->def == DefIdx) LR->valno->setCopy(0); } if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) @@ -884,7 +818,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, continue; LiveInterval &LI = li_->getInterval(*AS); if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) - if (LR->valno->getCopy() == CopyMI) + if (LR->valno->def == DefIdx) LR->valno->setCopy(0); } } @@ -1044,23 +978,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; } - DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)); // Enforce policies. if (CP.isPhys()) { - DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); + DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n"); // Only coalesce to allocatable physreg. if (!li_->isAllocatable(CP.getDstReg())) { DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); return false; // Not coalescable. } } else { - DEBUG({ - dbgs() << " with reg%" << CP.getDstReg(); - if (CP.getSubIdx()) - dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx()); - dbgs() << " to " << CP.getNewRC()->getName() << "\n"; - }); + DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << " to " << CP.getNewRC()->getName() << "\n"); // Avoid constraining virtual register regclass too much. if (CP.isCrossClass()) { @@ -1114,7 +1044,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) + ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI)) return true; ++numAborts; @@ -1134,7 +1064,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. if (!CP.isFlipped() && - ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, CP.getDstReg(), 0, CopyMI)) return true; @@ -1317,7 +1247,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? continue; // Never join with a register that has EarlyClobber redefs. @@ -1341,7 +1271,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy? continue; // Never join with a register that has EarlyClobber redefs. @@ -1495,9 +1425,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector<CopyRec> &TryAgain) { DEBUG(dbgs() << MBB->getName() << ":\n"); - std::vector<CopyRec> VirtCopies; - std::vector<CopyRec> PhysCopies; - std::vector<CopyRec> ImpDefCopies; + SmallVector<CopyRec, 8> VirtCopies; + SmallVector<CopyRec, 8> PhysCopies; + SmallVector<CopyRec, 8> ImpDefCopies; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E;) { MachineInstr *Inst = MII++; @@ -1690,6 +1620,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { tri_ = tm_->getRegisterInfo(); tii_ = tm_->getInstrInfo(); li_ = &getAnalysis<LiveIntervals>(); + ldv_ = &getAnalysis<LiveDebugVariables>(); AA = &getAnalysis<AliasAnalysis>(); loopInfo = &getAnalysis<MachineLoopInfo>(); @@ -1697,6 +1628,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { << "********** Function: " << ((Value*)mf_->getFunction())->getName() << '\n'); + if (VerifyCoalescing) + mf_->verify(this, "Before register coalescing"); + for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), E = tri_->regclass_end(); I != E; ++I) allocatableRCRegs_.insert(std::make_pair(*I, @@ -1739,9 +1673,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DoDelete = false; if (MI->allDefsAreDead()) { - LiveInterval &li = li_->getInterval(SrcReg); - if (!ShortenDeadCopySrcLiveRange(li, MI)) - ShortenDeadCopyLiveRange(li, MI); + if (li_->hasInterval(SrcReg)) { + LiveInterval &li = li_->getInterval(SrcReg); + if (!ShortenDeadCopySrcLiveRange(li, MI)) + ShortenDeadCopyLiveRange(li, MI); + } DoDelete = true; } if (!DoDelete) { @@ -1821,13 +1757,26 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (!MO.isReg() || !MO.isKill()) continue; unsigned reg = MO.getReg(); if (!reg || !li_->hasInterval(reg)) continue; - if (!li_->getInterval(reg).killedAt(DefIdx)) + if (!li_->getInterval(reg).killedAt(DefIdx)) { MO.setIsKill(false); + continue; + } + // When leaving a kill flag on a physreg, check if any subregs should + // remain alive. + if (!TargetRegisterInfo::isPhysicalRegister(reg)) + continue; + for (const unsigned *SR = tri_->getSubRegisters(reg); + unsigned S = *SR; ++SR) + if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx)) + MI->addRegisterDefined(S, tri_); } } } DEBUG(dump()); + DEBUG(ldv_->dump()); + if (VerifyCoalescing) + mf_->verify(this, "After register coalescing"); return true; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 855bdb98b36c..56703dfa2ddd 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -21,7 +21,7 @@ namespace llvm { class SimpleRegisterCoalescing; - class LiveVariables; + class LiveDebugVariables; class TargetRegisterInfo; class TargetInstrInfo; class VirtRegMap; @@ -44,6 +44,7 @@ namespace llvm { const TargetRegisterInfo* tri_; const TargetInstrInfo* tii_; LiveIntervals *li_; + LiveDebugVariables *ldv_; const MachineLoopInfo* loopInfo; AliasAnalysis *AA; @@ -63,7 +64,9 @@ namespace llvm { public: static char ID; // Pass identifcation, replacement for typeid - SimpleRegisterCoalescing() : MachineFunctionPass(ID) {} + SimpleRegisterCoalescing() : MachineFunctionPass(ID) { + initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); + } struct InstrSlots { enum { @@ -140,8 +143,10 @@ namespace llvm { /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. - bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, - unsigned DstSubIdx, MachineInstr *CopyMI); + /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. + bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, + unsigned DstReg, unsigned DstSubIdx, + MachineInstr *CopyMI); /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index b637980f885c..13e1454fa5f3 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -21,15 +21,14 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include <set> using namespace llvm; STATISTIC(NumInvokes, "Number of invokes replaced"); @@ -53,6 +52,7 @@ namespace { Constant *SelectorFn; Constant *ExceptionFn; Constant *CallSiteFn; + Constant *DispatchSetupFn; Value *CallSite; public: @@ -116,6 +116,8 @@ bool SjLjEHPass::doInitialization(Module &M) { SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); + DispatchSetupFn + = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup); PersonalityFn = 0; return true; @@ -317,8 +319,12 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Unwinds.push_back(UI); } } - // If we don't have any invokes or unwinds, there's nothing to do. - if (Unwinds.empty() && Invokes.empty()) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + // If we don't have any invokes, there's nothing to do. + if (Invokes.empty()) return false; // Find the eh.selector.*, eh.exception and alloca calls. // @@ -332,6 +338,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { SmallVector<CallInst*,16> EH_Selectors; SmallVector<CallInst*,16> EH_Exceptions; SmallVector<Instruction*,16> JmpbufUpdatePoints; + // Note: Skip the entry block since there's nothing there that interests // us. eh.selector and eh.exception shouldn't ever be there, and we // want to disregard any allocas that are there. @@ -351,228 +358,231 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { } } } + // If we don't have any eh.selector calls, we can't determine the personality // function. Without a personality function, we can't process exceptions. if (!PersonalityFn) return false; - NumInvokes += Invokes.size(); - NumUnwinds += Unwinds.size(); + // We have invokes, so we need to add register/unregister calls to get this + // function onto the global unwind stack. + // + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge we + // spill into a stack location, guaranteeing that there is nothing live across + // the unwind edge. This process also splits all critical edges coming out of + // invoke's. + splitLiveRangesAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be added to the global context list. + unsigned Align = 4; // FIXME: Should be a TLI check? + AllocaInst *FunctionContext = + new AllocaInst(FunctionContextTy, 0, Align, + "fcn_context", F.begin()->begin()); + + Value *Idxs[2]; + const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + // We need to also keep around a reference to the call_site field + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 1); + CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "call_site", + EntryBB->getTerminator()); + + // The exception selector comes back in context->data[1] + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "fc_data", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 1); + Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exc_selector_gep", + EntryBB->getTerminator()); + // The exception value comes back in context->data[0] + Idxs[1] = Zero; + Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exception_gep", + EntryBB->getTerminator()); + + // The result of the eh.selector call will be replaced with a a reference to + // the selector value returned in the function context. We leave the selector + // itself so the EH analysis later can use it. + for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { + CallInst *I = EH_Selectors[i]; + Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); + I->replaceAllUsesWith(SelectorVal); + } - if (!Invokes.empty()) { - // We have invokes, so we need to add register/unregister calls to get - // this function onto the global unwind stack. - // - // First thing we need to do is scan the whole function for values that are - // live across unwind edges. Each value that is live across an unwind edge - // we spill into a stack location, guaranteeing that there is nothing live - // across the unwind edge. This process also splits all critical edges - // coming out of invoke's. - splitLiveRangesAcrossInvokes(Invokes); - - BasicBlock *EntryBB = F.begin(); - // Create an alloca for the incoming jump buffer ptr and the new jump buffer - // that needs to be restored on all exits from the function. This is an - // alloca because the value needs to be added to the global context list. - unsigned Align = 4; // FIXME: Should be a TLI check? - AllocaInst *FunctionContext = - new AllocaInst(FunctionContextTy, 0, Align, - "fcn_context", F.begin()->begin()); - - Value *Idxs[2]; - const Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - // We need to also keep around a reference to the call_site field - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 1); - CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "call_site", - EntryBB->getTerminator()); - - // The exception selector comes back in context->data[1] - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "fc_data", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 1); - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, - "exc_selector_gep", - EntryBB->getTerminator()); - // The exception value comes back in context->data[0] - Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, - "exception_gep", - EntryBB->getTerminator()); - - // The result of the eh.selector call will be replaced with a - // a reference to the selector value returned in the function - // context. We leave the selector itself so the EH analysis later - // can use it. - for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { - CallInst *I = EH_Selectors[i]; - Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); - I->replaceAllUsesWith(SelectorVal); - } - // eh.exception calls are replaced with references to the proper - // location in the context. Unlike eh.selector, the eh.exception - // calls are removed entirely. - for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { - CallInst *I = EH_Exceptions[i]; - // Possible for there to be duplicates, so check to make sure - // the instruction hasn't already been removed. - if (!I->getParent()) continue; - Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); - const Type *Ty = Type::getInt8PtrTy(F.getContext()); - Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); - - I->replaceAllUsesWith(Val); - I->eraseFromParent(); - } + // eh.exception calls are replaced with references to the proper location in + // the context. Unlike eh.selector, the eh.exception calls are removed + // entirely. + for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { + CallInst *I = EH_Exceptions[i]; + // Possible for there to be duplicates, so check to make sure the + // instruction hasn't already been removed. + if (!I->getParent()) continue; + Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); + const Type *Ty = Type::getInt8PtrTy(F.getContext()); + Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); + + I->replaceAllUsesWith(Val); + I->eraseFromParent(); + } - // The entry block changes to have the eh.sjlj.setjmp, with a conditional - // branch to a dispatch block for non-zero returns. If we return normally, - // we're not handling an exception and just register the function context - // and continue. - - // Create the dispatch block. The dispatch block is basically a big switch - // statement that goes to all of the invoke landing pads. - BasicBlock *DispatchBlock = - BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - - // Insert a load in the Catch block, and a switch on its value. By default, - // we go to a block that just does an unwind (which is the correct action - // for a standard call). - BasicBlock *UnwindBlock = - BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); - - Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, - DispatchBlock); - SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), - DispatchBlock); - // Split the entry block to insert the conditional branch for the setjmp. - BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), - "eh.sjlj.setjmp.cont"); - - // Populate the Function Context - // 1. LSDA address - // 2. Personality function address - // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) - - // LSDA address - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 4); - Value *LSDAFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "lsda_gep", - EntryBB->getTerminator()); - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); - - Idxs[1] = ConstantInt::get(Int32Ty, 3); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "lsda_gep", + // The entry block changes to have the eh.sjlj.setjmp, with a conditional + // branch to a dispatch block for non-zero returns. If we return normally, + // we're not handling an exception and just register the function context and + // continue. + + // Create the dispatch block. The dispatch block is basically a big switch + // statement that goes to all of the invoke landing pads. + BasicBlock *DispatchBlock = + BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); + + // Add a call to dispatch_setup at the start of the dispatch block. This is + // expanded to any target-specific setup that needs to be done. + Value *SetupArg = + CastInst::Create(Instruction::BitCast, FunctionContext, + Type::getInt8PtrTy(F.getContext()), "", + DispatchBlock); + CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock); + + // Insert a load of the callsite in the dispatch block, and a switch on its + // value. By default, we go to a block that just does an unwind (which is the + // correct action for a standard call). + BasicBlock *UnwindBlock = + BasicBlock::Create(F.getContext(), "unwindbb", &F); + Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + + Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, + DispatchBlock); + SwitchInst *DispatchSwitch = + SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + DispatchBlock); + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "eh.sjlj.setjmp.cont"); + + // Populate the Function Context + // 1. LSDA address + // 2. Personality function address + // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) + + // LSDA address + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 4); + Value *LSDAFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", + EntryBB->getTerminator()); + new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + + Idxs[1] = ConstantInt::get(Int32Ty, 3); + Value *PersonalityFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + new StoreInst(PersonalityFn, PersonalityFieldPtr, true, + EntryBB->getTerminator()); + + // Save the frame pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 5); + Value *JBufPtr + = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "jbuf_gep", EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); - - // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr - = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, - "jbuf_gep", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = - GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", + Idxs[1] = ConstantInt::get(Int32Ty, 0); + Value *FramePtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", + EntryBB->getTerminator()); + + Value *Val = CallInst::Create(FrameAddrFn, + ConstantInt::get(Int32Ty, 0), + "fp", EntryBB->getTerminator()); + new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + + // Save the stack pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *StackPtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", + EntryBB->getTerminator()); + + Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); + new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. + Value *SetjmpArg = + CastInst::Create(Instruction::BitCast, JBufPtr, + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); + Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, + "dispatch", + EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher. + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, DispatchVal, Zero, + "notunwind"); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); + + // Register the function context and make sure it's known to not throw + CallInst *Register = + CallInst::Create(RegisterFn, FunctionContext, "", + ContBlock->getTerminator()); + Register->setDoesNotThrow(); + + // At this point, we are all set up, update the invoke instructions to mark + // their call_site values, and fill in the dispatch switch accordingly. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); + + // Mark call instructions that aren't nounwind as no-action (call_site == + // -1). Skip the entry block, as prior to then, no function context has been + // created for this function and any unexpected exceptions thrown will go + // directly to the caller's context, which is what we want anyway, so no need + // to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + // Ignore calls to the EH builtins (eh.selector, eh.exception) + Constant *Callee = CI->getCalledFunction(); + if (Callee != SelectorFn && Callee != ExceptionFn + && !CI->doesNotThrow()) + insertCallSiteStore(CI, -1, CallSite); + } + } - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); - - // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = - GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", - EntryBB->getTerminator()); - - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); - - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, - "dispatch", - EntryBB->getTerminator()); - // check the return value of the setjmp. non-zero goes to dispatcher. - Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), - ICmpInst::ICMP_EQ, DispatchVal, Zero, - "notunwind"); - // Nuke the uncond branch. - EntryBB->getTerminator()->eraseFromParent(); - - // Put in a new condbranch in its place. - BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); - - // Register the function context and make sure it's known to not throw - CallInst *Register = - CallInst::Create(RegisterFn, FunctionContext, "", - ContBlock->getTerminator()); - Register->setDoesNotThrow(); - - // At this point, we are all set up, update the invoke instructions - // to mark their call_site values, and fill in the dispatch switch - // accordingly. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) - markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); - - // Mark call instructions that aren't nounwind as no-action - // (call_site == -1). Skip the entry block, as prior to then, no function - // context has been created for this function and any unexpected exceptions - // thrown will go directly to the caller's context, which is what we want - // anyway, so no need to do anything here. - for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { - for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) - if (CallInst *CI = dyn_cast<CallInst>(I)) { - // Ignore calls to the EH builtins (eh.selector, eh.exception) - Constant *Callee = CI->getCalledFunction(); - if (Callee != SelectorFn && Callee != ExceptionFn - && !CI->doesNotThrow()) - insertCallSiteStore(CI, -1, CallSite); - } - } - - // Replace all unwinds with a branch to the unwind handler. - // ??? Should this ever happen with sjlj exceptions? - for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(UnwindBlock, Unwinds[i]); - Unwinds[i]->eraseFromParent(); - } - - // Following any allocas not in the entry block, update the saved SP - // in the jmpbuf to the new value. - for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { - Instruction *AI = JmpbufUpdatePoints[i]; - Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(AI); - Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); - StoreStackAddr->insertAfter(StackAddr); - } + // Replace all unwinds with a branch to the unwind handler. + // ??? Should this ever happen with sjlj exceptions? + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + BranchInst::Create(UnwindBlock, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } - // Finally, for any returns from this function, if this function contains an - // invoke, add a call to unregister the function context. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) - CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); + // Following any allocas not in the entry block, update the saved SP in the + // jmpbuf to the new value. + for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { + Instruction *AI = JmpbufUpdatePoints[i]; + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(AI); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); } + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) + CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); + return true; } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 1bc148f160bc..6e3fa90e4341 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -41,7 +41,7 @@ namespace { char SlotIndexes::ID = 0; INITIALIZE_PASS(SlotIndexes, "slotindexes", - "Slot index numbering", false, false); + "Slot index numbering", false, false) IndexListEntry* IndexListEntry::getEmptyKeyEntry() { return &*IndexListEntryEmptyKey; @@ -61,7 +61,6 @@ void SlotIndexes::releaseMemory() { mi2iMap.clear(); mbb2IdxMap.clear(); idx2MBBMap.clear(); - terminatorGaps.clear(); clearList(); } @@ -112,13 +111,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { if (mi->isDebugValue()) continue; - if (miItr == mbb->getFirstTerminator()) { - push_back(createEntry(0, index)); - terminatorGaps.insert( - std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); - index += SlotIndex::NUM; - } - // Insert a store index for the instr. push_back(createEntry(mi, index)); @@ -135,15 +127,12 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { index += (Slots + 1) * SlotIndex::NUM; } - if (mbb->getFirstTerminator() == mbb->end()) { - push_back(createEntry(0, index)); - terminatorGaps.insert( - std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT))); - index += SlotIndex::NUM; - } + // We insert two blank instructions between basic blocks. + // One to represent live-out registers and one to represent live-ins. + push_back(createEntry(0, index)); + index += SlotIndex::NUM; - // One blank instruction at the end. - push_back(createEntry(0, index)); + push_back(createEntry(0, index)); SlotIndex blockEndIndex(back(), SlotIndex::LOAD); mbb2IdxMap.insert( @@ -169,6 +158,7 @@ void SlotIndexes::renumberIndexes() { // resulting numbering will match what would have been generated by the // pass during the initial numbering of the function if the new instructions // had been present. + DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); functionSize = 0; unsigned index = 0; @@ -179,7 +169,7 @@ void SlotIndexes::renumberIndexes() { curEntry->setIndex(index); if (curEntry->getInstr() == 0) { - // MBB start entry or terminator gap. Just step index by 1. + // MBB start entry. Just step index by 1. index += SlotIndex::NUM; } else { @@ -214,11 +204,10 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { - os << entry().getIndex(); - if (isPHI()) - os << "*"; + if (isValid()) + os << entry().getIndex() << "LudS"[getSlot()]; else - os << "LudS"[getSlot()]; + os << "invalid"; } // Dump a SlotIndex to stderr. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp new file mode 100644 index 000000000000..9c0bf1629a14 --- /dev/null +++ b/lib/CodeGen/SpillPlacement.cpp @@ -0,0 +1,330 @@ +//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the spill code placement analysis. +// +// Each edge bundle corresponds to a node in a Hopfield network. Constraints on +// basic blocks are weighted by the block frequency and added to become the node +// bias. +// +// Transparent basic blocks have the variable live through, but don't care if it +// is spilled or in a register. These blocks become connections in the Hopfield +// network, again weighted by block frequency. +// +// The Hopfield network minimizes (possibly locally) its energy function: +// +// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b ) +// +// The energy function represents the expected spill code execution frequency, +// or the cost of spilling. This is a Lyapunov function which never increases +// when a node is updated. It is guaranteed to converge to a local minimum. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spillplacement" +#include "SpillPlacement.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +using namespace llvm; + +char SpillPlacement::ID = 0; +INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", + "Spill Code Placement Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(EdgeBundles) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement", + "Spill Code Placement Analysis", true, true) + +char &llvm::SpillPlacementID = SpillPlacement::ID; + +void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<EdgeBundles>(); + AU.addRequiredTransitive<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Node - Each edge bundle corresponds to a Hopfield node. +/// +/// The node contains precomputed frequency data that only depends on the CFG, +/// but Bias and Links are computed each time placeSpills is called. +/// +/// The node Value is positive when the variable should be in a register. The +/// value can change when linked nodes change, but convergence is very fast +/// because all weights are positive. +/// +struct SpillPlacement::Node { + /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle. + /// Ideally, these two numbers should be identical, but inaccuracies in the + /// block frequency estimates means that we need to normalize ingoing and + /// outgoing frequencies separately so they are commensurate. + float Frequency[2]; + + /// Bias - Normalized contributions from non-transparent blocks. + /// A bundle connected to a MustSpill block has a huge negative bias, + /// otherwise it is a number in the range [-2;2]. + float Bias; + + /// Value - Output value of this node computed from the Bias and links. + /// This is always in the range [-1;1]. A positive number means the variable + /// should go in a register through this bundle. + float Value; + + typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector; + + /// Links - (Weight, BundleNo) for all transparent blocks connecting to other + /// bundles. The weights are all positive and add up to at most 2, weights + /// from ingoing and outgoing nodes separately add up to a most 1. The weight + /// sum can be less than 2 when the variable is not live into / out of some + /// connected basic blocks. + LinkVector Links; + + /// preferReg - Return true when this node prefers to be in a register. + bool preferReg() const { + // Undecided nodes (Value==0) go on the stack. + return Value > 0; + } + + /// mustSpill - Return True if this node is so biased that it must spill. + bool mustSpill() const { + // Actually, we must spill if Bias < sum(weights). + // It may be worth it to compute the weight sum here? + return Bias < -2.0f; + } + + /// Node - Create a blank Node. + Node() { + Frequency[0] = Frequency[1] = 0; + } + + /// clear - Reset per-query data, but preserve frequencies that only depend on + // the CFG. + void clear() { + Bias = Value = 0; + Links.clear(); + } + + /// addLink - Add a link to bundle b with weight w. + /// out=0 for an ingoing link, and 1 for an outgoing link. + void addLink(unsigned b, float w, bool out) { + // Normalize w relative to all connected blocks from that direction. + w /= Frequency[out]; + + // There can be multiple links to the same bundle, add them up. + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) + if (I->second == b) { + I->first += w; + return; + } + // This must be the first link to b. + Links.push_back(std::make_pair(w, b)); + } + + /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. + void addBias(float w, bool out) { + // Normalize w relative to all connected blocks from that direction. + w /= Frequency[out]; + Bias += w; + } + + /// update - Recompute Value from Bias and Links. Return true when node + /// preference changes. + bool update(const Node nodes[]) { + // Compute the weighted sum of inputs. + float Sum = Bias; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) + Sum += I->first * nodes[I->second].Value; + + // The weighted sum is going to be in the range [-2;2]. Ideally, we should + // simply set Value = sign(Sum), but we will add a dead zone around 0 for + // two reasons: + // 1. It avoids arbitrary bias when all links are 0 as is possible during + // initial iterations. + // 2. It helps tame rounding errors when the links nominally sum to 0. + const float Thres = 1e-4f; + bool Before = preferReg(); + if (Sum < -Thres) + Value = -1; + else if (Sum > Thres) + Value = 1; + else + Value = 0; + return Before != preferReg(); + } +}; + +bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + bundles = &getAnalysis<EdgeBundles>(); + loops = &getAnalysis<MachineLoopInfo>(); + + assert(!nodes && "Leaking node array"); + nodes = new Node[bundles->getNumBundles()]; + + // Compute total ingoing and outgoing block frequencies for all bundles. + for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { + float Freq = getBlockFrequency(I); + unsigned Num = I->getNumber(); + nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq; + nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq; + } + + // We never change the function. + return false; +} + +void SpillPlacement::releaseMemory() { + delete[] nodes; + nodes = 0; +} + +/// activate - mark node n as active if it wasn't already. +void SpillPlacement::activate(unsigned n) { + if (ActiveNodes->test(n)) + return; + ActiveNodes->set(n); + nodes[n].clear(); +} + + +/// prepareNodes - Compute node biases and weights from a set of constraints. +/// Set a bit in NodeMask for each active node. +void SpillPlacement:: +prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) { + for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(), + E = LiveBlocks.end(); I != E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number); + float Freq = getBlockFrequency(MBB); + + // Is this a transparent block? Link ingoing and outgoing bundles. + if (I->Entry == DontCare && I->Exit == DontCare) { + unsigned ib = bundles->getBundle(I->Number, 0); + unsigned ob = bundles->getBundle(I->Number, 1); + + // Ignore self-loops. + if (ib == ob) + continue; + activate(ib); + activate(ob); + nodes[ib].addLink(ob, Freq, 1); + nodes[ob].addLink(ib, Freq, 0); + continue; + } + + // This block is not transparent, but it can still add bias. + const float Bias[] = { + 0, // DontCare, + 1, // PrefReg, + -1, // PrefSpill + -HUGE_VALF // MustSpill + }; + + // Live-in to block? + if (I->Entry != DontCare) { + unsigned ib = bundles->getBundle(I->Number, 0); + activate(ib); + nodes[ib].addBias(Freq * Bias[I->Entry], 1); + } + + // Live-out from block? + if (I->Exit != DontCare) { + unsigned ob = bundles->getBundle(I->Number, 1); + activate(ob); + nodes[ob].addBias(Freq * Bias[I->Exit], 0); + } + } +} + +/// iterate - Repeatedly update the Hopfield nodes until stability or the +/// maximum number of iterations is reached. +/// @param Linked - Numbers of linked nodes that need updating. +void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) { + if (Linked.empty()) + return; + + // Run up to 10 iterations. The edge bundle numbering is closely related to + // basic block numbering, so there is a strong tendency towards chains of + // linked nodes with sequential numbers. By scanning the linked nodes + // backwards and forwards, we make it very likely that a single node can + // affect the entire network in a single iteration. That means very fast + // convergence, usually in a single iteration. + for (unsigned iteration = 0; iteration != 10; ++iteration) { + // Scan backwards, skipping the last node which was just updated. + bool Changed = false; + for (SmallVectorImpl<unsigned>::const_reverse_iterator I = + llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { + unsigned n = *I; + bool C = nodes[n].update(nodes); + Changed |= C; + } + if (!Changed) + return; + + // Scan forwards, skipping the first node which was just updated. + Changed = false; + for (SmallVectorImpl<unsigned>::const_iterator I = + llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { + unsigned n = *I; + bool C = nodes[n].update(nodes); + Changed |= C; + } + if (!Changed) + return; + } +} + +bool +SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks, + BitVector &RegBundles) { + // Reuse RegBundles as our ActiveNodes vector. + ActiveNodes = &RegBundles; + ActiveNodes->clear(); + ActiveNodes->resize(bundles->getNumBundles()); + + // Compute active nodes, links and biases. + prepareNodes(LiveBlocks); + + // Update all active nodes, and find the ones that are actually linked to + // something so their value may change when iterating. + SmallVector<unsigned, 8> Linked; + for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) { + nodes[n].update(nodes); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (!nodes[n].Links.empty() && !nodes[n].mustSpill()) + Linked.push_back(n); + } + + // Iterate the network to convergence. + iterate(Linked); + + // Write preferences back to RegBundles. + bool Perfect = true; + for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) + if (!nodes[n].preferReg()) { + RegBundles.reset(n); + Perfect = false; + } + return Perfect; +} + +/// getBlockFrequency - Return our best estimate of the block frequency which is +/// the expected number of block executions per function invocation. +float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) { + // Use the unnormalized spill weight for real block frequencies. + return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB)); +} + diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h new file mode 100644 index 000000000000..ef2d516cdce7 --- /dev/null +++ b/lib/CodeGen/SpillPlacement.h @@ -0,0 +1,108 @@ +//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis computes the optimal spill code placement between basic blocks. +// +// The runOnMachineFunction() method only precomputes some profiling information +// about the CFG. The real work is done by placeSpills() which is called by the +// register allocator. +// +// Given a variable that is live across multiple basic blocks, and given +// constraints on the basic blocks where the variable is live, determine which +// edge bundles should have the variable in a register and which edge bundles +// should have the variable in a stack slot. +// +// The returned bit vector can be used to place optimal spill code at basic +// block entries and exits. Spill code placement inside a basic block is not +// considered. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H +#define LLVM_CODEGEN_SPILLPLACEMENT_H + +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class BitVector; +class EdgeBundles; +class MachineBasicBlock; +class MachineLoopInfo; +template <typename> class SmallVectorImpl; + +class SpillPlacement : public MachineFunctionPass { + struct Node; + const MachineFunction *MF; + const EdgeBundles *bundles; + const MachineLoopInfo *loops; + Node *nodes; + + // Nodes that are active in the current computation. Owned by the placeSpills + // caller. + BitVector *ActiveNodes; + +public: + static char ID; // Pass identification, replacement for typeid. + + SpillPlacement() : MachineFunctionPass(ID), nodes(0) {} + ~SpillPlacement() { releaseMemory(); } + + /// BorderConstraint - A basic block has separate constraints for entry and + /// exit. + enum BorderConstraint { + DontCare, ///< Block doesn't care / variable not live. + PrefReg, ///< Block entry/exit prefers a register. + PrefSpill, ///< Block entry/exit prefers a stack slot. + MustSpill ///< A register is impossible, variable must be spilled. + }; + + /// BlockConstraint - Entry and exit constraints for a basic block. + struct BlockConstraint { + unsigned Number; ///< Basic block number (from MBB::getNumber()). + BorderConstraint Entry : 8; ///< Constraint on block entry. + BorderConstraint Exit : 8; ///< Constraint on block exit. + }; + + /// placeSpills - Compute the optimal spill code placement given the + /// constraints. No MustSpill constraints will be violated, and the smallest + /// possible number of PrefX constraints will be violated, weighted by + /// expected execution frequencies. + /// @param LiveBlocks Constraints for blocks that have the variable live in or + /// live out. DontCare/DontCare means the variable is live + /// through the block. DontCare/X means the variable is live + /// out, but not live in. + /// @param RegBundles Bit vector to receive the edge bundles where the + /// variable should be kept in a register. Each bit + /// corresponds to an edge bundle, a set bit means the + /// variable should be kept in a register through the + /// bundle. A clear bit means the variable should be + /// spilled. + /// @return True if a perfect solution was found, allowing the variable to be + /// in a register through all relevant bundles. + bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks, + BitVector &RegBundles); + + /// getBlockFrequency - Return the estimated block execution frequency per + /// function invocation. + float getBlockFrequency(const MachineBasicBlock*); + +private: + virtual bool runOnMachineFunction(MachineFunction&); + virtual void getAnalysisUsage(AnalysisUsage&) const; + virtual void releaseMemory(); + + void activate(unsigned); + void prepareNodes(const SmallVectorImpl<BlockConstraint>&); + void iterate(const SmallVectorImpl<unsigned>&); +}; + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 59d5ab33c994..fd385824aff9 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -12,6 +12,7 @@ #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,7 +29,7 @@ using namespace llvm; namespace { - enum SpillerName { trivial, standard, splitting, inline_ }; + enum SpillerName { trivial, standard, inline_ }; } static cl::opt<SpillerName> @@ -37,7 +38,6 @@ spillerOpt("spiller", cl::Prefix, cl::values(clEnumVal(trivial, "trivial spiller"), clEnumVal(standard, "default spiller"), - clEnumVal(splitting, "splitting spiller"), clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), cl::init(standard)); @@ -80,7 +80,7 @@ protected: assert(li->weight != HUGE_VALF && "Attempting to spill already spilled value."); - assert(!li->isStackSlot() && + assert(!TargetRegisterInfo::isStackSlot(li->reg) && "Trying to spill a stack slot."); DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); @@ -144,7 +144,7 @@ protected: vrm->addSpillSlotUse(ss, loadInstr); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = - newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator()); + newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator()); newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } @@ -158,7 +158,7 @@ protected: vrm->addSpillSlotUse(ss, storeInstr); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = - newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator()); + newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator()); newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } @@ -182,7 +182,7 @@ public: void spill(LiveInterval *li, SmallVectorImpl<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &) { + const SmallVectorImpl<LiveInterval*> &) { // Ignore spillIs - we don't use it. trivialSpillEverywhere(li, newIntervals); } @@ -195,315 +195,42 @@ namespace { /// Falls back on LiveIntervals::addIntervalsForSpills. class StandardSpiller : public Spiller { protected: + MachineFunction *mf; LiveIntervals *lis; + LiveStacks *lss; MachineLoopInfo *loopInfo; VirtRegMap *vrm; public: StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : lis(&pass.getAnalysis<LiveIntervals>()), + : mf(&mf), + lis(&pass.getAnalysis<LiveIntervals>()), + lss(&pass.getAnalysis<LiveStacks>()), loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()), vrm(&vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. void spill(LiveInterval *li, SmallVectorImpl<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs) { + const SmallVectorImpl<LiveInterval*> &spillIs) { std::vector<LiveInterval*> added = lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); newIntervals.insert(newIntervals.end(), added.begin(), added.end()); - } -}; - -} // end anonymous namespace - -namespace { - -/// When a call to spill is placed this spiller will first try to break the -/// interval up into its component values (one new interval per value). -/// If this fails, or if a call is placed to spill a previously split interval -/// then the spiller falls back on the standard spilling mechanism. -class SplittingSpiller : public StandardSpiller { -public: - SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : StandardSpiller(pass, mf, vrm) { - mri = &mf.getRegInfo(); - tii = mf.getTarget().getInstrInfo(); - tri = mf.getTarget().getRegisterInfo(); - } - void spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs) { - if (worthTryingToSplit(li)) - tryVNISplit(li); - else - StandardSpiller::spill(li, newIntervals, spillIs); + // Update LiveStacks. + int SS = vrm->getStackSlot(li->reg); + if (SS == VirtRegMap::NO_STACK_SLOT) + return; + const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg); + LiveInterval &SI = lss->getOrCreateInterval(SS, RC); + if (!SI.hasAtLeastOneValue()) + SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); + SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0)); } - -private: - - MachineRegisterInfo *mri; - const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; - DenseSet<LiveInterval*> alreadySplit; - - bool worthTryingToSplit(LiveInterval *li) const { - return (!alreadySplit.count(li) && li->getNumValNums() > 1); - } - - /// Try to break a LiveInterval into its component values. - std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) { - - DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n"); - - std::vector<LiveInterval*> added; - SmallVector<VNInfo*, 4> vnis; - - std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); - - for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(), - vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { - VNInfo *vni = *vniItr; - - // Skip unused VNIs. - if (vni->isUnused()) - continue; - - DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); - LiveInterval *splitInterval = extractVNI(li, vni); - - if (splitInterval != 0) { - DEBUG(dbgs() << *splitInterval << "\n"); - added.push_back(splitInterval); - alreadySplit.insert(splitInterval); - } else { - DEBUG(dbgs() << "0\n"); - } - } - - DEBUG(dbgs() << "Original LI: " << *li << "\n"); - - // If there original interval still contains some live ranges - // add it to added and alreadySplit. - if (!li->empty()) { - added.push_back(li); - alreadySplit.insert(li); - } - - return added; - } - - /// Extract the given value number from the interval. - LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { - assert(vni->isDefAccurate() || vni->isPHIDef()); - - // Create a new vreg and live interval, copy VNI ranges over. - const TargetRegisterClass *trc = mri->getRegClass(li->reg); - unsigned newVReg = mri->createVirtualRegister(trc); - vrm->grow(); - LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); - VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); - - // Start by copying all live ranges in the VN to the new interval. - for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); - rItr != rEnd; ++rItr) { - if (rItr->valno == vni) { - newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI)); - } - } - - // Erase the old VNI & ranges. - li->removeValNo(vni); - - // Collect all current uses of the register belonging to the given VNI. - // We'll use this to rename the register after we've dealt with the def. - std::set<MachineInstr*> uses; - for (MachineRegisterInfo::use_iterator - useItr = mri->use_begin(li->reg), useEnd = mri->use_end(); - useItr != useEnd; ++useItr) { - uses.insert(&*useItr); - } - - // Process the def instruction for this VNI. - if (newVNI->isPHIDef()) { - // Insert a copy at the start of the MBB. The range proceeding the - // copy will be attached to the original LiveInterval. - MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); - MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(), - tii->get(TargetOpcode::COPY), newVReg) - .addReg(li->reg, RegState::Kill); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), - 0, false, lis->getVNInfoAllocator()); - phiDefVNI->setIsPHIDef(true); - li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); - LiveRange *oldPHIDefRange = - newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); - - // If the old phi def starts in the middle of the range chop it up. - if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) { - LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end, - oldPHIDefRange->valno); - oldPHIDefRange->end = lis->getMBBStartIdx(defMBB); - newLI->addRange(oldPHIDefRange2); - } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) { - // Otherwise if it's at the start of the range just trim it. - oldPHIDefRange->start = copyIdx.getDefIndex(); - } else { - assert(false && "PHI def range doesn't cover PHI def?"); - } - - newVNI->def = copyIdx.getDefIndex(); - newVNI->setCopy(copyMI); - newVNI->setIsPHIDef(false); // not a PHI def anymore. - newVNI->setIsDefAccurate(true); - } else { - // non-PHI def. Rename the def. If it's two-addr that means renaming the - // use and inserting a new copy too. - MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); - // We'll rename this now, so we can remove it from uses. - uses.erase(defInst); - unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg); - bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx), - twoAddrUseIsUndef = false; - - for (unsigned i = 0; i < defInst->getNumOperands(); ++i) { - MachineOperand &mo = defInst->getOperand(i); - if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) { - mo.setReg(newVReg); - if (isTwoAddr && mo.isUse() && mo.isUndef()) - twoAddrUseIsUndef = true; - } - } - - SlotIndex defIdx = lis->getInstructionIndex(defInst); - newVNI->def = defIdx.getDefIndex(); - - if (isTwoAddr && !twoAddrUseIsUndef) { - MachineBasicBlock *defMBB = defInst->getParent(); - MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(), - tii->get(TargetOpcode::COPY), newVReg) - .addReg(li->reg, RegState::Kill); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - LiveRange *origUseRange = - li->getLiveRangeContaining(newVNI->def.getUseIndex()); - origUseRange->end = copyIdx.getDefIndex(); - VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, - true, lis->getVNInfoAllocator()); - LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); - newLI->addRange(copyRange); - } - } - - for (std::set<MachineInstr*>::iterator - usesItr = uses.begin(), usesEnd = uses.end(); - usesItr != usesEnd; ++usesItr) { - MachineInstr *useInst = *usesItr; - SlotIndex useIdx = lis->getInstructionIndex(useInst); - LiveRange *useRange = - newLI->getLiveRangeContaining(useIdx.getUseIndex()); - - // If this use doesn't belong to the new interval skip it. - if (useRange == 0) - continue; - - // This use doesn't belong to the VNI, skip it. - if (useRange->valno != newVNI) - continue; - - // Check if this instr is two address. - unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); - bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); - - // Rename uses (and defs for two-address instrs). - for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { - MachineOperand &mo = useInst->getOperand(i); - if (mo.isReg() && (mo.isUse() || isTwoAddress) && - (mo.getReg() == li->reg)) { - mo.setReg(newVReg); - } - } - - // If this is a two address instruction we've got some extra work to do. - if (isTwoAddress) { - // We modified the def operand, so we need to copy back to the original - // reg. - MachineBasicBlock *useMBB = useInst->getParent(); - MachineBasicBlock::iterator useItr(useInst); - MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(), - tii->get(TargetOpcode::COPY), newVReg) - .addReg(li->reg, RegState::Kill); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - - // Change the old two-address defined range & vni to start at - // (and be defined by) the copy. - LiveRange *origDefRange = - li->getLiveRangeContaining(useIdx.getDefIndex()); - origDefRange->start = copyIdx.getDefIndex(); - origDefRange->valno->def = copyIdx.getDefIndex(); - origDefRange->valno->setCopy(copyMI); - - // Insert a new range & vni for the two-address-to-copy value. This - // will be attached to the new live interval. - VNInfo *copyVNI = - newLI->getNextValue(useIdx.getDefIndex(), 0, true, - lis->getVNInfoAllocator()); - LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); - newLI->addRange(copyRange); - } - } - - // Iterate over any PHI kills - we'll need to insert new copies for them. - for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end(); - LRI != LRE; ++LRI) { - if (LRI->valno != newVNI || LRI->end.isPHI()) - continue; - SlotIndex killIdx = LRI->end; - MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); - MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(), - DebugLoc(), tii->get(TargetOpcode::COPY), - li->reg) - .addReg(newVReg, RegState::Kill); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - - // Save the current end. We may need it to add a new range if the - // current range runs of the end of the MBB. - SlotIndex newKillRangeEnd = LRI->end; - LRI->end = copyIdx.getDefIndex(); - - if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { - assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && - "PHI kill range doesn't reach kill-block end. Not sane."); - newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), - newKillRangeEnd, newVNI)); - } - - VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), - copyMI, true, - lis->getVNInfoAllocator()); - newKillVNI->setHasPHIKill(true); - li->addRange(LiveRange(copyIdx.getDefIndex(), - lis->getMBBEndIdx(killMBB), - newKillVNI)); - } - newVNI->setHasPHIKill(false); - - return newLI; - } - }; } // end anonymous namespace - -namespace llvm { -Spiller *createInlineSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm); -} - llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { @@ -511,7 +238,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, default: assert(0 && "unknown spiller"); case trivial: return new TrivialSpiller(pass, mf, vrm); case standard: return new StandardSpiller(pass, mf, vrm); - case splitting: return new SplittingSpiller(pass, mf, vrm); case inline_: return createInlineSpiller(pass, mf, vrm); } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 59bc0ec6ae70..f017583494ed 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -10,14 +10,13 @@ #ifndef LLVM_CODEGEN_SPILLER_H #define LLVM_CODEGEN_SPILLER_H -#include "llvm/ADT/SmallVector.h" - namespace llvm { class LiveInterval; class MachineFunction; class MachineFunctionPass; class SlotIndex; + template <typename T> class SmallVectorImpl; class VirtRegMap; /// Spiller interface. @@ -37,7 +36,7 @@ namespace llvm { /// @param newIntervals The newly created intervals will be appended here. virtual void spill(LiveInterval *li, SmallVectorImpl<LiveInterval*> &newIntervals, - SmallVectorImpl<LiveInterval*> &spillIs) = 0; + const SmallVectorImpl<LiveInterval*> &spillIs) = 0; }; @@ -45,6 +44,13 @@ namespace llvm { Spiller* createSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm); + + /// Create and return a spiller that will insert spill code directly instead + /// of deferring though VirtRegMap. + Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); + } #endif diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 29474f0d5512..5663936bf3aa 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "splitter" +#define DEBUG_TYPE "regalloc" #include "SplitKit.h" +#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -36,371 +37,231 @@ AllowSplit("spiller-splits-edges", // Split Analysis //===----------------------------------------------------------------------===// -SplitAnalysis::SplitAnalysis(const MachineFunction &mf, +SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, const MachineLoopInfo &mli) - : mf_(mf), - lis_(lis), - loops_(mli), - tii_(*mf.getTarget().getInstrInfo()), - curli_(0) {} + : MF(vrm.getMachineFunction()), + VRM(vrm), + LIS(lis), + Loops(mli), + TII(*MF.getTarget().getInstrInfo()), + CurLI(0) {} void SplitAnalysis::clear() { - usingInstrs_.clear(); - usingBlocks_.clear(); - usingLoops_.clear(); - curli_ = 0; + UseSlots.clear(); + UsingInstrs.clear(); + UsingBlocks.clear(); + LiveBlocks.clear(); + CurLI = 0; } bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { MachineBasicBlock *T, *F; SmallVector<MachineOperand, 4> Cond; - return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); + return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond); } -/// analyzeUses - Count instructions, basic blocks, and loops using curli. +/// analyzeUses - Count instructions, basic blocks, and loops using CurLI. void SplitAnalysis::analyzeUses() { - const MachineRegisterInfo &MRI = mf_.getRegInfo(); - for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg); - MachineInstr *MI = I.skipInstruction();) { - if (MI->isDebugValue() || !usingInstrs_.insert(MI)) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg), + E = MRI.reg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + if (MO.isUse() && MO.isUndef()) continue; - MachineBasicBlock *MBB = MI->getParent(); - if (usingBlocks_[MBB]++) + MachineInstr *MI = MO.getParent(); + if (MI->isDebugValue() || !UsingInstrs.insert(MI)) continue; - if (MachineLoop *Loop = loops_.getLoopFor(MBB)) - usingLoops_[Loop]++; + UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex()); + MachineBasicBlock *MBB = MI->getParent(); + UsingBlocks[MBB]++; } + array_pod_sort(UseSlots.begin(), UseSlots.end()); + calcLiveBlockInfo(); DEBUG(dbgs() << " counted " - << usingInstrs_.size() << " instrs, " - << usingBlocks_.size() << " blocks, " - << usingLoops_.size() << " loops.\n"); + << UsingInstrs.size() << " instrs, " + << UsingBlocks.size() << " blocks.\n"); } -/// removeUse - Update statistics by noting that MI no longer uses curli. -void SplitAnalysis::removeUse(const MachineInstr *MI) { - if (!usingInstrs_.erase(MI)) +/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks +/// where CurLI is live. +void SplitAnalysis::calcLiveBlockInfo() { + if (CurLI->empty()) return; - // Decrement MBB count. - const MachineBasicBlock *MBB = MI->getParent(); - BlockCountMap::iterator bi = usingBlocks_.find(MBB); - assert(bi != usingBlocks_.end() && "MBB missing"); - assert(bi->second && "0 count in map"); - if (--bi->second) - return; - // No more uses in MBB. - usingBlocks_.erase(bi); + LiveInterval::const_iterator LVI = CurLI->begin(); + LiveInterval::const_iterator LVE = CurLI->end(); + + SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE; + UseI = UseSlots.begin(); + UseE = UseSlots.end(); + + // Loop over basic blocks where CurLI is live. + MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start); + for (;;) { + BlockInfo BI; + BI.MBB = MFI; + SlotIndex Start, Stop; + tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + // The last split point is the latest possible insertion point that dominates + // all successor blocks. If interference reaches LastSplitPoint, it is not + // possible to insert a split or reload that makes CurLI live in the + // outgoing bundle. + MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB); + if (LSP == BI.MBB->end()) + BI.LastSplitPoint = Stop; + else + BI.LastSplitPoint = LIS.getInstructionIndex(LSP); + + // LVI is the first live segment overlapping MBB. + BI.LiveIn = LVI->start <= Start; + if (!BI.LiveIn) + BI.Def = LVI->start; + + // Find the first and last uses in the block. + BI.Uses = hasUses(MFI); + if (BI.Uses && UseI != UseE) { + BI.FirstUse = *UseI; + assert(BI.FirstUse >= Start); + do ++UseI; + while (UseI != UseE && *UseI < Stop); + BI.LastUse = UseI[-1]; + assert(BI.LastUse < Stop); + } - // Decrement loop count. - MachineLoop *Loop = loops_.getLoopFor(MBB); - if (!Loop) - return; - LoopCountMap::iterator li = usingLoops_.find(Loop); - assert(li != usingLoops_.end() && "Loop missing"); - assert(li->second && "0 count in map"); - if (--li->second) - return; - // No more blocks in Loop. - usingLoops_.erase(li); -} + // Look for gaps in the live range. + bool hasGap = false; + BI.LiveOut = true; + while (LVI->end < Stop) { + SlotIndex LastStop = LVI->end; + if (++LVI == LVE || LVI->start >= Stop) { + BI.Kill = LastStop; + BI.LiveOut = false; + break; + } + if (LastStop < LVI->start) { + hasGap = true; + BI.Kill = LastStop; + BI.Def = LVI->start; + } + } -// Get three sets of basic blocks surrounding a loop: Blocks inside the loop, -// predecessor blocks, and exit blocks. -void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) { - Blocks.clear(); - - // Blocks in the loop. - Blocks.Loop.insert(Loop->block_begin(), Loop->block_end()); - - // Predecessor blocks. - const MachineBasicBlock *Header = Loop->getHeader(); - for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(), - E = Header->pred_end(); I != E; ++I) - if (!Blocks.Loop.count(*I)) - Blocks.Preds.insert(*I); - - // Exit blocks. - for (MachineLoop::block_iterator I = Loop->block_begin(), - E = Loop->block_end(); I != E; ++I) { - const MachineBasicBlock *MBB = *I; - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if (!Blocks.Loop.count(*SI)) - Blocks.Exits.insert(*SI); - } -} + // Don't set LiveThrough when the block has a gap. + BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut; + LiveBlocks.push_back(BI); -/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in -/// and around the Loop. -SplitAnalysis::LoopPeripheralUse SplitAnalysis:: -analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) { - LoopPeripheralUse use = ContainedInLoop; - for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); - I != E; ++I) { - const MachineBasicBlock *MBB = I->first; - // Is this a peripheral block? - if (use < MultiPeripheral && - (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) { - if (I->second > 1) use = MultiPeripheral; - else use = SinglePeripheral; - continue; - } - // Is it a loop block? - if (Blocks.Loop.count(MBB)) - continue; - // It must be an unrelated block. - return OutsideLoop; - } - return use; -} + // LVI is now at LVE or LVI->end >= Stop. + if (LVI == LVE) + break; -/// getCriticalExits - It may be necessary to partially break critical edges -/// leaving the loop if an exit block has phi uses of curli. Collect the exit -/// blocks that need special treatment into CriticalExits. -void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, - BlockPtrSet &CriticalExits) { - CriticalExits.clear(); - - // A critical exit block contains a phi def of curli, and has a predecessor - // that is not in the loop nor a loop predecessor. - // For such an exit block, the edges carrying the new variable must be moved - // to a new pre-exit block. - for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end(); - I != E; ++I) { - const MachineBasicBlock *Succ = *I; - SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ); - VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx); - // This exit may not have curli live in at all. No need to split. - if (!SuccVNI) - continue; - // If this is not a PHI def, it is either using a value from before the - // loop, or a value defined inside the loop. Both are safe. - if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx) - continue; - // This exit block does have a PHI. Does it also have a predecessor that is - // not a loop block or loop predecessor? - for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), - PE = Succ->pred_end(); PI != PE; ++PI) { - const MachineBasicBlock *Pred = *PI; - if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred)) - continue; - // This is a critical exit block, and we need to split the exit edge. - CriticalExits.insert(Succ); + // Live segment ends exactly at Stop. Move to the next segment. + if (LVI->end == Stop && ++LVI == LVE) break; - } + + // Pick the next basic block. + if (LVI->start < Stop) + ++MFI; + else + MFI = LIS.getMBBFromIndex(LVI->start); } } -/// canSplitCriticalExits - Return true if it is possible to insert new exit -/// blocks before the blocks in CriticalExits. -bool -SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks, - BlockPtrSet &CriticalExits) { - // If we don't allow critical edge splitting, require no critical exits. - if (!AllowSplit) - return CriticalExits.empty(); - - for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end(); - I != E; ++I) { - const MachineBasicBlock *Succ = *I; - // We want to insert a new pre-exit MBB before Succ, and change all the - // in-loop blocks to branch to the pre-exit instead of Succ. - // Check that all the in-loop predecessors can be changed. - for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(), - PE = Succ->pred_end(); PI != PE; ++PI) { - const MachineBasicBlock *Pred = *PI; - // The external predecessors won't be altered. - if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred)) - continue; - if (!canAnalyzeBranch(Pred)) - return false; - } - - // If Succ's layout predecessor falls through, that too must be analyzable. - // We need to insert the pre-exit block in the gap. - MachineFunction::const_iterator MFI = Succ; - if (MFI == mf_.begin()) - continue; - if (!canAnalyzeBranch(--MFI)) - return false; +void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const { + for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) { + unsigned count = UsingBlocks.lookup(*I); + OS << " BB#" << (*I)->getNumber(); + if (count) + OS << '(' << count << ')'; } - // No problems found. - return true; } void SplitAnalysis::analyze(const LiveInterval *li) { clear(); - curli_ = li; + CurLI = li; analyzeUses(); } -const MachineLoop *SplitAnalysis::getBestSplitLoop() { - assert(curli_ && "Call analyze() before getBestSplitLoop"); - if (usingLoops_.empty()) - return 0; - - LoopPtrSet Loops, SecondLoops; - LoopBlocks Blocks; - BlockPtrSet CriticalExits; - - // Find first-class and second class candidate loops. - // We prefer to split around loops where curli is used outside the periphery. - for (LoopCountMap::const_iterator I = usingLoops_.begin(), - E = usingLoops_.end(); I != E; ++I) { - const MachineLoop *Loop = I->first; - getLoopBlocks(Loop, Blocks); - - // FIXME: We need an SSA updater to properly handle multiple exit blocks. - if (Blocks.Exits.size() > 1) { - DEBUG(dbgs() << " multiple exits from " << *Loop); - continue; - } - - LoopPtrSet *LPS = 0; - switch(analyzeLoopPeripheralUse(Blocks)) { - case OutsideLoop: - LPS = &Loops; - break; - case MultiPeripheral: - LPS = &SecondLoops; - break; - case ContainedInLoop: - DEBUG(dbgs() << " contained in " << *Loop); - continue; - case SinglePeripheral: - DEBUG(dbgs() << " single peripheral use in " << *Loop); - continue; - } - // Will it be possible to split around this loop? - getCriticalExits(Blocks, CriticalExits); - DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from " - << *Loop); - if (!canSplitCriticalExits(Blocks, CriticalExits)) - continue; - // This is a possible split. - assert(LPS); - LPS->insert(Loop); - } - - DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + " - << SecondLoops.size() << " candidate loops.\n"); - - // If there are no first class loops available, look at second class loops. - if (Loops.empty()) - Loops = SecondLoops; - if (Loops.empty()) - return 0; +//===----------------------------------------------------------------------===// +// LiveIntervalMap +//===----------------------------------------------------------------------===// - // Pick the earliest loop. - // FIXME: Are there other heuristics to consider? - const MachineLoop *Best = 0; - SlotIndex BestIdx; - for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E; - ++I) { - SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader()); - if (!Best || Idx < BestIdx) - Best = *I, BestIdx = Idx; - } - DEBUG(dbgs() << " getBestSplitLoop found " << *Best); - return Best; +// Work around the fact that the std::pair constructors are broken for pointer +// pairs in some implementations. makeVV(x, 0) works. +static inline std::pair<const VNInfo*, VNInfo*> +makeVV(const VNInfo *a, VNInfo *b) { + return std::make_pair(a, b); } -/// getMultiUseBlocks - if curli has more than one use in a basic block, it -/// may be an advantage to split curli for the duration of the block. -bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { - // If curli is local to one block, there is no point to splitting it. - if (usingBlocks_.size() <= 1) - return false; - // Add blocks with multiple uses. - for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end(); - I != E; ++I) - switch (I->second) { - case 0: - case 1: - continue; - case 2: { - // It doesn't pay to split a 2-instr block if it redefines curli. - VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first)); - VNInfo *VN2 = - curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex()); - // live-in and live-out with a different value. - if (VN1 && VN2 && VN1 != VN2) - continue; - } // Fall through. - default: - Blocks.insert(I->first); - } - return !Blocks.empty(); +void LiveIntervalMap::reset(LiveInterval *li) { + LI = li; + Values.clear(); + LiveOutCache.clear(); } -//===----------------------------------------------------------------------===// -// LiveIntervalMap -//===----------------------------------------------------------------------===// +bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const { + ValueMap::const_iterator i = Values.find(ParentVNI); + return i != Values.end() && i->second == 0; +} -// defValue - Introduce a li_ def for ParentVNI that could be later than +// defValue - Introduce a LI def for ParentVNI that could be later than // ParentVNI->def. VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { + assert(LI && "call reset first"); assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); - assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); - - // Is this a simple 1-1 mapping? Not likely. - if (Idx == ParentVNI->def) - return mapValue(ParentVNI, Idx); - - // This is a complex def. Mark with a NULL in valueMap. - VNInfo *OldVNI = - valueMap_.insert( - ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second; - // The static_cast<VNInfo *> is only needed to work around a bug in an - // old version of the C++0x standard which the following compilers - // implemented and have yet to fix: - // - // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel - // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 - // - // If/When we move to C++0x, this can be replaced by nullptr. - (void)OldVNI; - assert(OldVNI == 0 && "Simple/Complex values mixed"); - - // Should we insert a minimal snippet of VNI LiveRange, or can we count on - // callers to do that? We need it for lookups of complex values. - VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator()); + assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + + // Create a new value. + VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator()); + + // Preserve the PHIDef bit. + if (ParentVNI->isPHIDef() && Idx == ParentVNI->def) + VNI->setIsPHIDef(true); + + // Use insert for lookup, so we can add missing values with a second lookup. + std::pair<ValueMap::iterator,bool> InsP = + Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0)); + + // This is now a complex def. Mark with a NULL in valueMap. + if (!InsP.second) + InsP.first->second = 0; + return VNI; } + // mapValue - Find the mapped value for ParentVNI at Idx. // Potentially create phi-def values. -VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) { +VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, + bool *simple) { + assert(LI && "call reset first"); assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); - assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); // Use insert for lookup, so we can add missing values with a second lookup. std::pair<ValueMap::iterator,bool> InsP = - valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))); - // The static_cast<VNInfo *> is only needed to work around a bug in an - // old version of the C++0x standard which the following compilers - // implemented and have yet to fix: - // - // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel - // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 - // - // If/When we move to C++0x, this can be replaced by nullptr. + Values.insert(makeVV(ParentVNI, 0)); // This was an unknown value. Create a simple mapping. - if (InsP.second) - return InsP.first->second = li_.createValueCopy(ParentVNI, - lis_.getVNInfoAllocator()); + if (InsP.second) { + if (simple) *simple = true; + return InsP.first->second = LI->createValueCopy(ParentVNI, + LIS.getVNInfoAllocator()); + } + // This was a simple mapped value. - if (InsP.first->second) + if (InsP.first->second) { + if (simple) *simple = true; return InsP.first->second; + } // This is a complex mapped value. There may be multiple defs, and we may need // to create phi-defs. - MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx); + if (simple) *simple = false; + MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx); assert(IdxMBB && "No MBB at Idx"); // Is there a def in the same MBB we can extend? @@ -409,157 +270,260 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) { // Now for the fun part. We know that ParentVNI potentially has multiple defs, // and we may need to create even more phi-defs to preserve VNInfo SSA form. - // Perform a depth-first search for predecessor blocks where we know the - // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. - - // Track MBBs where we have created or learned the dominating value. - // This may change during the DFS as we create new phi-defs. - typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap; - MBBValueMap DomValue; - - for (idf_iterator<MachineBasicBlock*> - IDFI = idf_begin(IdxMBB), - IDFE = idf_end(IdxMBB); IDFI != IDFE;) { - MachineBasicBlock *MBB = *IDFI; - SlotIndex End = lis_.getMBBEndIdx(MBB); - - // We are operating on the restricted CFG where ParentVNI is live. - if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) { - IDFI.skipChildren(); - continue; - } - - // Do we have a dominating value in this block? - VNInfo *VNI = extendTo(MBB, End); - if (!VNI) { - ++IDFI; - continue; + // Perform a search for all predecessor blocks where we know the dominating + // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. + DEBUG(dbgs() << "\n Reaching defs for BB#" << IdxMBB->getNumber() + << " at " << Idx << " in " << *LI << '\n'); + + // Blocks where LI should be live-in. + SmallVector<MachineDomTreeNode*, 16> LiveIn; + LiveIn.push_back(MDT[IdxMBB]); + + // Using LiveOutCache as a visited set, perform a BFS for all reaching defs. + for (unsigned i = 0; i != LiveIn.size(); ++i) { + MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + // Is this a known live-out block? + std::pair<LiveOutMap::iterator,bool> LOIP = + LiveOutCache.insert(std::make_pair(Pred, LiveOutPair())); + // Yes, we have been here before. + if (!LOIP.second) { + DEBUG(if (VNInfo *VNI = LOIP.first->second.first) + dbgs() << " known valno #" << VNI->id + << " at BB#" << Pred->getNumber() << '\n'); + continue; + } + + // Does Pred provide a live-out value? + SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot(); + if (VNInfo *VNI = extendTo(Pred, Last)) { + MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def); + DEBUG(dbgs() << " found valno #" << VNI->id + << " from BB#" << DefMBB->getNumber() + << " at BB#" << Pred->getNumber() << '\n'); + LiveOutPair &LOP = LOIP.first->second; + LOP.first = VNI; + LOP.second = MDT[DefMBB]; + continue; + } + // No, we need a live-in value for Pred as well + if (Pred != IdxMBB) + LiveIn.push_back(MDT[Pred]); } + } - // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs - // as needed along the way. - for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) { - // Start from MBB's immediate successor. End at IdxMBB. - MachineBasicBlock *Succ = IDFI.getPath(PI-1); - std::pair<MBBValueMap::iterator, bool> InsP = - DomValue.insert(MBBValueMap::value_type(Succ, VNI)); - - // This is the first time we backtrack to Succ. - if (InsP.second) - continue; - - // We reached Succ again with the same VNI. Nothing is going to change. - VNInfo *OVNI = InsP.first->second; - if (OVNI == VNI) - break; + // We may need to add phi-def values to preserve the SSA form. + // This is essentially the same iterative algorithm that SSAUpdater uses, + // except we already have a dominator tree, so we don't have to recompute it. + VNInfo *IdxVNI = 0; + unsigned Changes; + do { + Changes = 0; + DEBUG(dbgs() << " Iterating over " << LiveIn.size() << " blocks.\n"); + // Propagate live-out values down the dominator tree, inserting phi-defs when + // necessary. Since LiveIn was created by a BFS, going backwards makes it more + // likely for us to visit immediate dominators before their children. + for (unsigned i = LiveIn.size(); i; --i) { + MachineDomTreeNode *Node = LiveIn[i-1]; + MachineBasicBlock *MBB = Node->getBlock(); + MachineDomTreeNode *IDom = Node->getIDom(); + LiveOutPair IDomValue; + // We need a live-in value to a block with no immediate dominator? + // This is probably an unreachable block that has survived somehow. + bool needPHI = !IDom; + + // Get the IDom live-out value. + if (!needPHI) { + LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock()); + if (I != LiveOutCache.end()) + IDomValue = I->second; + else + // If IDom is outside our set of live-out blocks, there must be new + // defs, and we need a phi-def here. + needPHI = true; + } - // Succ already has a phi-def. No need to continue. - SlotIndex Start = lis_.getMBBStartIdx(Succ); - if (OVNI->def == Start) - break; + // IDom dominates all of our predecessors, but it may not be the immediate + // dominator. Check if any of them have live-out values that are properly + // dominated by IDom. If so, we need a phi-def here. + if (!needPHI) { + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + LiveOutPair Value = LiveOutCache[*PI]; + if (!Value.first || Value.first == IDomValue.first) + continue; + // This predecessor is carrying something other than IDomValue. + // It could be because IDomValue hasn't propagated yet, or it could be + // because MBB is in the dominance frontier of that value. + if (MDT.dominates(IDom, Value.second)) { + needPHI = true; + break; + } + } + } - // We have a collision between the old and new VNI at Succ. That means - // neither dominates and we need a new phi-def. - VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator()); - VNI->setIsPHIDef(true); - InsP.first->second = VNI; - - // Replace OVNI with VNI in the remaining path. - for (; PI > 1 ; --PI) { - MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2)); - if (I == DomValue.end() || I->second != OVNI) - break; - I->second = VNI; + // Create a phi-def if required. + if (needPHI) { + ++Changes; + SlotIndex Start = LIS.getMBBStartIdx(MBB); + VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator()); + VNI->setIsPHIDef(true); + DEBUG(dbgs() << " - BB#" << MBB->getNumber() + << " phi-def #" << VNI->id << " at " << Start << '\n'); + // We no longer need LI to be live-in. + LiveIn.erase(LiveIn.begin()+(i-1)); + // Blocks in LiveIn are either IdxMBB, or have a value live-through. + if (MBB == IdxMBB) + IdxVNI = VNI; + // Check if we need to update live-out info. + LiveOutMap::iterator I = LiveOutCache.find(MBB); + if (I == LiveOutCache.end() || I->second.second == Node) { + // We already have a live-out defined in MBB, so this must be IdxMBB. + assert(MBB == IdxMBB && "Adding phi-def to known live-out"); + LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI)); + } else { + // This phi-def is also live-out, so color the whole block. + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + I->second = LiveOutPair(VNI, Node); + } + } else if (IDomValue.first) { + // No phi-def here. Remember incoming value for IdxMBB. + if (MBB == IdxMBB) + IdxVNI = IDomValue.first; + // Propagate IDomValue if needed: + // MBB is live-out and doesn't define its own value. + LiveOutMap::iterator I = LiveOutCache.find(MBB); + if (I != LiveOutCache.end() && I->second.second != Node && + I->second.first != IDomValue.first) { + ++Changes; + I->second = IDomValue; + DEBUG(dbgs() << " - BB#" << MBB->getNumber() + << " idom valno #" << IDomValue.first->id + << " from BB#" << IDom->getBlock()->getNumber() << '\n'); + } } } + DEBUG(dbgs() << " - made " << Changes << " changes.\n"); + } while (Changes); - // No need to search the children, we found a dominating value. - IDFI.skipChildren(); - } + assert(IdxVNI && "Didn't find value for Idx"); - // The search should at least find a dominating value for IdxMBB. - assert(!DomValue.empty() && "Couldn't find a reaching definition"); +#ifndef NDEBUG + // Check the LiveOutCache invariants. + for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); + I != E; ++I) { + assert(I->first && "Null MBB entry in cache"); + assert(I->second.first && "Null VNInfo in cache"); + assert(I->second.second && "Null DomTreeNode in cache"); + if (I->second.second->getBlock() == I->first) + continue; + for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), + PE = I->first->pred_end(); PI != PE; ++PI) + assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant"); + } +#endif - // Since we went through the trouble of a full DFS visiting all reaching defs, - // the values in DomValue are now accurate. No more phi-defs are needed for - // these blocks, so we can color the live ranges. + // Since we went through the trouble of a full BFS visiting all reaching defs, + // the values in LiveIn are now accurate. No more phi-defs are needed + // for these blocks, so we can color the live ranges. // This makes the next mapValue call much faster. - VNInfo *IdxVNI = 0; - for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E; - ++I) { - MachineBasicBlock *MBB = I->first; - VNInfo *VNI = I->second; - SlotIndex Start = lis_.getMBBStartIdx(MBB); - if (MBB == IdxMBB) { - // Don't add full liveness to IdxMBB, stop at Idx. - if (Start != Idx) - li_.addRange(LiveRange(Start, Idx, VNI)); - // The caller had better add some liveness to IdxVNI, or it leaks. - IdxVNI = VNI; - } else - li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { + MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + SlotIndex Start = LIS.getMBBStartIdx(MBB); + VNInfo *VNI = LiveOutCache.lookup(MBB).first; + + // Anything in LiveIn other than IdxMBB is live-through. + // In IdxMBB, we should stop at Idx unless the same value is live-out. + if (MBB == IdxMBB && IdxVNI != VNI) + LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI)); + else + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); } - assert(IdxVNI && "Didn't find value for Idx"); return IdxVNI; } -// extendTo - Find the last li_ value defined in MBB at or before Idx. The -// parentli_ is assumed to be live at Idx. Extend the live range to Idx. +#ifndef NDEBUG +void LiveIntervalMap::dumpCache() { + for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); + I != E; ++I) { + assert(I->first && "Null MBB entry in cache"); + assert(I->second.first && "Null VNInfo in cache"); + assert(I->second.second && "Null DomTreeNode in cache"); + dbgs() << " cache: BB#" << I->first->getNumber() + << " has valno #" << I->second.first->id << " from BB#" + << I->second.second->getBlock()->getNumber() << ", preds"; + for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), + PE = I->first->pred_end(); PI != PE; ++PI) + dbgs() << " BB#" << (*PI)->getNumber(); + dbgs() << '\n'; + } + dbgs() << " cache: " << LiveOutCache.size() << " entries.\n"; +} +#endif + +// extendTo - Find the last LI value defined in MBB at or before Idx. The +// ParentLI is assumed to be live at Idx. Extend the live range to Idx. // Return the found VNInfo, or NULL. -VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) { - LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx); - if (I == li_.begin()) +VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) { + assert(LI && "call reset first"); + LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx); + if (I == LI->begin()) return 0; --I; - if (I->start < lis_.getMBBStartIdx(MBB)) + if (I->end <= LIS.getMBBStartIdx(MBB)) return 0; - if (I->end < Idx) - I->end = Idx; + if (I->end <= Idx) + I->end = Idx.getNextSlot(); return I->valno; } -// addSimpleRange - Add a simple range from parentli_ to li_. +// addSimpleRange - Add a simple range from ParentLI to LI. // ParentVNI must be live in the [Start;End) interval. void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI) { - VNInfo *VNI = mapValue(ParentVNI, Start); - // A simple mappoing is easy. - if (VNI->def == ParentVNI->def) { - li_.addRange(LiveRange(Start, End, VNI)); + assert(LI && "call reset first"); + bool simple; + VNInfo *VNI = mapValue(ParentVNI, Start, &simple); + // A simple mapping is easy. + if (simple) { + LI->addRange(LiveRange(Start, End, VNI)); return; } // ParentVNI is a complex value. We must map per MBB. - MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start); - MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot()); if (MBB == MBBE) { - li_.addRange(LiveRange(Start, End, VNI)); + LI->addRange(LiveRange(Start, End, VNI)); return; } // First block. - li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI)); + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); // Run sequence of full blocks. for (++MBB; MBB != MBBE; ++MBB) { - Start = lis_.getMBBStartIdx(MBB); - li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), - mapValue(ParentVNI, Start))); + Start = LIS.getMBBStartIdx(MBB); + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), + mapValue(ParentVNI, Start))); } // Final block. - Start = lis_.getMBBStartIdx(MBB); + Start = LIS.getMBBStartIdx(MBB); if (Start != End) - li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); + LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); } -/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. +/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. /// All needed values whose def is not inside [Start;End) must be defined /// beforehand so mapValue will work. void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { - LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end(); + assert(LI && "call reset first"); + LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end(); LiveInterval::const_iterator I = std::lower_bound(B, E, Start); // Check if --I begins before Start and overlaps. @@ -575,403 +539,374 @@ void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { addSimpleRange(I->start, std::min(End, I->end), I->valno); } + //===----------------------------------------------------------------------===// // Split Editor //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, - SmallVectorImpl<LiveInterval*> &intervals) - : sa_(sa), lis_(lis), vrm_(vrm), - mri_(vrm.getMachineFunction().getRegInfo()), - tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()), - curli_(sa_.getCurLI()), - dupli_(0), openli_(0), - intervals_(intervals), - firstInterval(intervals_.size()) +SplitEditor::SplitEditor(SplitAnalysis &sa, + LiveIntervals &lis, + VirtRegMap &vrm, + MachineDominatorTree &mdt, + LiveRangeEdit &edit) + : SA(sa), LIS(lis), VRM(vrm), + MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), + TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + Edit(edit), + OpenIdx(0), + RegAssign(Allocator) { - assert(curli_ && "SplitEditor created from empty SplitAnalysis"); - - // Make sure curli_ is assigned a stack slot, so all our intervals get the - // same slot as curli_. - if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT) - vrm_.assignVirt2StackSlot(curli_->reg); - + // We don't need an AliasAnalysis since we will only be performing + // cheap-as-a-copy remats anyway. + Edit.anyRematerializable(LIS, TII, 0); } -LiveInterval *SplitEditor::createInterval() { - unsigned curli = sa_.getCurLI()->reg; - unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli)); - LiveInterval &Intv = lis_.getOrCreateInterval(Reg); - vrm_.grow(); - vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli)); - return &Intv; +void SplitEditor::dump() const { + if (RegAssign.empty()) { + dbgs() << " empty\n"; + return; + } + + for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) + dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); + dbgs() << '\n'; } -LiveInterval *SplitEditor::getDupLI() { - if (!dupli_) { - // Create an interval for dupli that is a copy of curli. - dupli_ = createInterval(); - dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator()); +VNInfo *SplitEditor::defFromParent(unsigned RegIdx, + VNInfo *ParentVNI, + SlotIndex UseIdx, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineInstr *CopyMI = 0; + SlotIndex Def; + LiveInterval *LI = Edit.get(RegIdx); + + // Attempt cheap-as-a-copy rematerialization. + LiveRangeEdit::Remat RM(ParentVNI); + if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) { + Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI); + } else { + // Can't remat, just insert a copy from parent. + CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) + .addReg(Edit.getReg()); + Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex(); } - return dupli_; -} -VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) { - VNInfo *&VNI = valueMap_[curliVNI]; - if (!VNI) - VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator()); - return VNI; -} + // Define the value in Reg. + VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def); + VNI->setCopy(CopyMI); -/// Insert a COPY instruction curli -> li. Allocate a new value from li -/// defined by the COPY. Note that rewrite() will deal with the curli -/// register, so this function can be used to copy from any interval - openli, -/// curli, or dupli. -VNInfo *SplitEditor::insertCopy(LiveInterval &LI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY), - LI.reg).addReg(curli_->reg); - SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator()); + // Add minimal liveness for the new value. + Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + return VNI; } /// Create a new virtual register and live interval. void SplitEditor::openIntv() { - assert(!openli_ && "Previous LI not closed before openIntv"); - openli_ = createInterval(); - intervals_.push_back(openli_); - liveThrough_ = false; -} + assert(!OpenIdx && "Previous LI not closed before openIntv"); -/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is -/// not live before Idx, a COPY is not inserted. -void SplitEditor::enterIntvBefore(SlotIndex Idx) { - assert(openli_ && "openIntv not called before enterIntvBefore"); - - // Copy from curli_ if it is live. - if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) { - MachineInstr *MI = lis_.getInstructionFromIndex(Idx); - assert(MI && "enterIntvBefore called with invalid index"); - VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI); - openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI)); - - // Make sure CurVNI is properly mapped. - VNInfo *&mapVNI = valueMap_[CurVNI]; - // We dont have SSA update yet, so only one entry per value is allowed. - assert(!mapVNI && "enterIntvBefore called more than once for the same value"); - mapVNI = VNI; + // Create the complement as index 0. + if (Edit.empty()) { + Edit.create(MRI, LIS, VRM); + LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); + LIMappers.back().reset(Edit.get(0)); } - DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n'); -} -/// enterIntvAtEnd - Enter openli at the end of MBB. -/// PhiMBB is a successor inside openli where a PHI value is created. -/// Currently, all entries must share the same PhiMBB. -void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) { - assert(openli_ && "openIntv not called before enterIntvAtEnd"); - - SlotIndex EndA = lis_.getMBBEndIdx(&A); - VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex()); - if (!CurVNIA) { - DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#" - << A.getNumber() << ".\n"); - return; - } + // Create the open interval. + OpenIdx = Edit.size(); + Edit.create(MRI, LIS, VRM); + LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); + LIMappers[OpenIdx].reset(Edit.get(OpenIdx)); +} - // Add a phi kill value and live range out of A. - VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator()); - openli_->addRange(LiveRange(VNIA->def, EndA, VNIA)); - - // FIXME: If this is the only entry edge, we don't need the extra PHI value. - // FIXME: If there are multiple entry blocks (so not a loop), we need proper - // SSA update. - - // Now look at the start of B. - SlotIndex StartB = lis_.getMBBStartIdx(&B); - SlotIndex EndB = lis_.getMBBEndIdx(&B); - const LiveRange *CurB = curli_->getLiveRangeContaining(StartB); - if (!CurB) { - DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#" - << B.getNumber() << ".\n"); - return; +SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvBefore"); + DEBUG(dbgs() << " enterIntvBefore " << Idx); + Idx = Idx.getBaseIndex(); + VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvBefore called with invalid index"); - VNInfo *VNIB = openli_->getVNInfoAt(StartB); - if (!VNIB) { - // Create a phi value. - VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false, - lis_.getVNInfoAllocator()); - VNIB->setIsPHIDef(true); - VNInfo *&mapVNI = valueMap_[CurB->valno]; - if (mapVNI) { - // Multiple copies - must create PHI value. - abort(); - } else { - // This is the first copy of dupLR. Mark the mapping. - mapVNI = VNIB; - } + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI); + return VNI->def; +} +SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); + SlotIndex End = LIS.getMBBEndIdx(&MBB); + SlotIndex Last = End.getPrevSlot(); + DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); + VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return End; } - - DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n'); + DEBUG(dbgs() << ": valno " << ParentVNI->id); + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, + LIS.getLastSplitPoint(Edit.getParent(), &MBB)); + RegAssign.insert(VNI->def, End, OpenIdx); + DEBUG(dump()); + return VNI->def; } -/// useIntv - indicate that all instructions in MBB should use openli. +/// useIntv - indicate that all instructions in MBB should use OpenLI. void SplitEditor::useIntv(const MachineBasicBlock &MBB) { - useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB)); + useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB)); } void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { - assert(openli_ && "openIntv not called before useIntv"); + assert(OpenIdx && "openIntv not called before useIntv"); + DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):"); + RegAssign.insert(Start, End, OpenIdx); + DEBUG(dump()); +} - // Map the curli values from the interval into openli_ - LiveInterval::const_iterator B = curli_->begin(), E = curli_->end(); - LiveInterval::const_iterator I = std::lower_bound(B, E, Start); +SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before leaveIntvAfter"); + DEBUG(dbgs() << " leaveIntvAfter " << Idx); - if (I != B) { - --I; - // I begins before Start, but overlaps. - if (I->end > Start) - openli_->addRange(LiveRange(Start, std::min(End, I->end), - mapValue(I->valno))); - ++I; + // The interval must be live beyond the instruction at Idx. + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx.getNextSlot(); } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); - // The remaining ranges begin after Start. - for (;I != E && I->start < End; ++I) - openli_->addRange(LiveRange(I->start, std::min(End, I->end), - mapValue(I->valno))); - DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_ - << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "No instruction at index"); + VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), + llvm::next(MachineBasicBlock::iterator(MI))); + return VNI->def; } -/// leaveIntvAfter - Leave openli after the instruction at Idx. -void SplitEditor::leaveIntvAfter(SlotIndex Idx) { - assert(openli_ && "openIntv not called before leaveIntvAfter"); +SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before leaveIntvBefore"); + DEBUG(dbgs() << " leaveIntvBefore " << Idx); - const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex()); - if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) { - DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n"); - return; + // The interval must be live into the instruction at Idx. + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx.getNextSlot(); } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); - // Was this value of curli live through openli? - if (!openli_->liveAt(CurLR->valno->def)) { - DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n"); - liveThrough_ = true; - return; - } - - // We are going to insert a back copy, so we must have a dupli_. - LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex()); - assert(DupLR && "dupli not live into black, but curli is?"); - - // Insert the COPY instruction. - MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx); - MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(), - tii_.get(TargetOpcode::COPY), dupli_->reg) - .addReg(openli_->reg); - SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx, - mapValue(CurLR->valno))); - DupLR->valno->def = CopyIdx; - DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "No instruction at index"); + VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); + return VNI->def; } -/// leaveIntvAtTop - Leave the interval at the top of MBB. -/// Currently, only one value can leave the interval. -void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { - assert(openli_ && "openIntv not called before leaveIntvAtTop"); - - SlotIndex Start = lis_.getMBBStartIdx(&MBB); - const LiveRange *CurLR = curli_->getLiveRangeContaining(Start); - - // Is curli even live-in to MBB? - if (!CurLR) { - DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n"); - return; - } - - // Is curli defined by PHI at the beginning of MBB? - bool isPHIDef = CurLR->valno->isPHIDef() && - CurLR->valno->def.getBaseIndex() == Start; +SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before leaveIntvAtTop"); + SlotIndex Start = LIS.getMBBStartIdx(&MBB); + DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); - // If MBB is using a value of curli that was defined outside the openli range, - // we don't want to copy it back here. - if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) { - DEBUG(dbgs() << " leaveIntvAtTop at " << Start - << ": using external value\n"); - liveThrough_ = true; - return; + VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Start; } - // We are going to insert a back copy, so we must have a dupli_. - LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start); - assert(DupLR && "dupli not live into black, but curli is?"); - - // Insert the COPY instruction. - MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(), - tii_.get(TargetOpcode::COPY), dupli_->reg) - .addReg(openli_->reg); - SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - - // Adjust dupli and openli values. - if (isPHIDef) { - // dupli was already a PHI on entry to MBB. Simply insert an openli PHI, - // and shift the dupli def down to the COPY. - VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, - lis_.getVNInfoAllocator()); - VNI->setIsPHIDef(true); - openli_->addRange(LiveRange(VNI->def, Idx, VNI)); - - dupli_->removeRange(Start, Idx); - DupLR->valno->def = Idx; - DupLR->valno->setIsPHIDef(false); - } else { - // The dupli value was defined somewhere inside the openli range. - DEBUG(dbgs() << " leaveIntvAtTop source value defined at " - << DupLR->valno->def << "\n"); - // FIXME: We may not need a PHI here if all predecessors have the same - // value. - VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false, - lis_.getVNInfoAllocator()); - VNI->setIsPHIDef(true); - openli_->addRange(LiveRange(VNI->def, Idx, VNI)); - - // FIXME: What if DupLR->valno is used by multiple exits? SSA Update. - - // closeIntv is going to remove the superfluous live ranges. - DupLR->valno->def = Idx; - DupLR->valno->setIsPHIDef(false); - } + VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, + MBB.SkipPHIsAndLabels(MBB.begin())); + RegAssign.insert(Start, VNI->def, OpenIdx); + DEBUG(dump()); + return VNI->def; +} - DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n'); +void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { + assert(OpenIdx && "openIntv not called before overlapIntv"); + assert(Edit.getParent().getVNInfoAt(Start) == + Edit.getParent().getVNInfoAt(End.getPrevSlot()) && + "Parent changes value in extended range"); + assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*"); + assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && + "Range cannot span basic blocks"); + + // Treat this as useIntv() for now. The complement interval will be extended + // as needed by mapValue(). + DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); + RegAssign.insert(Start, End, OpenIdx); + DEBUG(dump()); } /// closeIntv - Indicate that we are done editing the currently open /// LiveInterval, and ranges can be trimmed. void SplitEditor::closeIntv() { - assert(openli_ && "openIntv not called before closeIntv"); - - DEBUG(dbgs() << " closeIntv cleaning up\n"); - DEBUG(dbgs() << " open " << *openli_ << '\n'); - - if (liveThrough_) { - DEBUG(dbgs() << " value live through region, leaving dupli as is.\n"); - } else { - // live out with copies inserted, or killed by region. Either way we need to - // remove the overlapping region from dupli. - getDupLI(); - for (LiveInterval::iterator I = openli_->begin(), E = openli_->end(); - I != E; ++I) { - dupli_->removeRange(I->start, I->end); - } - // FIXME: A block branching to the entry block may also branch elsewhere - // curli is live. We need both openli and curli to be live in that case. - DEBUG(dbgs() << " dup2 " << *dupli_ << '\n'); - } - openli_ = 0; - valueMap_.clear(); + assert(OpenIdx && "openIntv not called before closeIntv"); + OpenIdx = 0; } -/// rewrite - after all the new live ranges have been created, rewrite -/// instructions using curli to use the new intervals. -void SplitEditor::rewrite() { - assert(!openli_ && "Previous LI not closed before rewrite"); - const LiveInterval *curli = sa_.getCurLI(); - for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg), - RE = mri_.reg_end(); RI != RE;) { +/// rewriteAssigned - Rewrite all uses of Edit.getReg(). +void SplitEditor::rewriteAssigned() { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()), + RE = MRI.reg_end(); RI != RE;) { MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; + // LiveDebugVariables should have handled all DBG_VALUE instructions. if (MI->isDebugValue()) { DEBUG(dbgs() << "Zapping " << *MI); - // FIXME: We can do much better with debug values. MO.setReg(0); continue; } - SlotIndex Idx = lis_.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); - LiveInterval *LI = dupli_; - for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { - LiveInterval *testli = intervals_[i]; - if (testli->liveAt(Idx)) { - LI = testli; - break; - } - } - if (LI) { - MO.setReg(LI->reg); - sa_.removeUse(MI); - DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI); - } - } - // dupli_ goes in last, after rewriting. - if (dupli_) { - if (dupli_->empty()) { - DEBUG(dbgs() << " dupli became empty?\n"); - lis_.removeInterval(dupli_->reg); - dupli_ = 0; - } else { - dupli_->RenumberValues(lis_); - intervals_.push_back(dupli_); + // <undef> operands don't really read the register, so just assign them to + // the complement. + if (MO.isUse() && MO.isUndef()) { + MO.setReg(Edit.get(0)->reg); + continue; } + + SlotIndex Idx = LIS.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + + // Rewrite to the mapped register at Idx. + unsigned RegIdx = RegAssign.lookup(Idx); + MO.setReg(Edit.get(RegIdx)->reg); + DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' + << Idx << ':' << RegIdx << '\t' << *MI); + + // Extend liveness to Idx. + const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + LIMappers[RegIdx].mapValue(ParentVNI, Idx); } +} - // Calculate spill weight and allocation hints for new intervals. - VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_); - for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) { - LiveInterval &li = *intervals_[i]; - vrai.CalculateRegClass(li.reg); - vrai.CalculateWeightAndHint(li); - DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName() - << ":" << li << '\n'); +/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping. +void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs, + const ConnectedVNInfoEqClasses &ConEq) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MO.isUse() && MO.isUndef()) + continue; + // DBG_VALUE instructions should have been eliminated earlier. + SlotIndex Idx = LIS.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' + << Idx << ':'); + const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx); + assert(VNI && "Interval not live at use."); + MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg); + DEBUG(dbgs() << VNI->id << '\t' << *MI); } } +void SplitEditor::finish() { + assert(OpenIdx == 0 && "Previous LI not closed before rewrite"); -//===----------------------------------------------------------------------===// -// Loop Splitting -//===----------------------------------------------------------------------===// + // At this point, the live intervals in Edit contain VNInfos corresponding to + // the inserted copies. -bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) { - SplitAnalysis::LoopBlocks Blocks; - sa_.getLoopBlocks(Loop, Blocks); + // Add the original defs from the parent interval. + for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), + E = Edit.getParent().vni_end(); I != E; ++I) { + const VNInfo *ParentVNI = *I; + if (ParentVNI->isUnused()) + continue; + LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)]; + VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def); + LIM.getLI()->addRange(LiveRange(ParentVNI->def, + ParentVNI->def.getNextSlot(), VNI)); + // Mark all values as complex to force liveness computation. + // This should really only be necessary for remat victims, but we are lazy. + LIM.markComplexMapped(ParentVNI); + } - // Break critical edges as needed. - SplitAnalysis::BlockPtrSet CriticalExits; - sa_.getCriticalExits(Blocks, CriticalExits); - assert(CriticalExits.empty() && "Cannot break critical exits yet"); +#ifndef NDEBUG + // Every new interval must have a def by now, otherwise the split is bogus. + for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); +#endif + + // FIXME: Don't recompute the liveness of all values, infer it from the + // overlaps between the parent live interval and RegAssign. + // The mapValue algorithm is only necessary when: + // - The parent value maps to multiple defs, and new phis are needed, or + // - The value has been rematerialized before some uses, and we want to + // minimize the live range so it only reaches the remaining uses. + // All other values have simple liveness that can be computed from RegAssign + // and the parent live interval. + + // Extend live ranges to be live-out for successor PHI values. + for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), + E = Edit.getParent().vni_end(); I != E; ++I) { + const VNInfo *PHIVNI = *I; + if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + LiveIntervalMap &LIM = LIMappers[RegIdx]; + MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); + DEBUG(dbgs() << " map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def + << " -> " << RegIdx << '\n'); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); + DEBUG(dbgs() << " pred BB#" << (*PI)->getNumber() << '@' << End); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) { + DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n"); + assert(RegAssign.lookup(End) == RegIdx && + "Different register assignment in phi predecessor"); + LIM.mapValue(VNI, End); + } + else + DEBUG(dbgs() << " is not live-out\n"); + } + DEBUG(dbgs() << " " << *LIM.getLI() << '\n'); + } - // Create new live interval for the loop. - openIntv(); + // Rewrite instructions. + rewriteAssigned(); - // Insert copies in the predecessors. - for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(), - E = Blocks.Preds.end(); I != E; ++I) { - MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); - enterIntvAtEnd(MBB, *Loop->getHeader()); - } + // FIXME: Delete defs that were rematted everywhere. - // Switch all loop blocks. - for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(), - E = Blocks.Loop.end(); I != E; ++I) - useIntv(**I); + // Get rid of unused values and set phi-kill flags. + for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + (*I)->RenumberValues(LIS); - // Insert back copies in the exit blocks. - for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(), - E = Blocks.Exits.end(); I != E; ++I) { - MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I); - leaveIntvAtTop(MBB); + // Now check if any registers were separated into multiple components. + ConnectedVNInfoEqClasses ConEQ(LIS); + for (unsigned i = 0, e = Edit.size(); i != e; ++i) { + // Don't use iterators, they are invalidated by create() below. + LiveInterval *li = Edit.get(i); + unsigned NumComp = ConEQ.Classify(li); + if (NumComp <= 1) + continue; + DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); + SmallVector<LiveInterval*, 8> dups; + dups.push_back(li); + for (unsigned i = 1; i != NumComp; ++i) + dups.push_back(&Edit.create(MRI, LIS, VRM)); + rewriteComponents(dups, ConEQ); + ConEQ.Distribute(&dups[0]); } - // Done. - closeIntv(); - rewrite(); - return dupli_; + // Calculate spill weight and allocation hints for new intervals. + VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops); + for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){ + LiveInterval &li = **I; + vrai.CalculateRegClass(li.reg); + vrai.CalculateWeightAndHint(li); + DEBUG(dbgs() << " new interval " << MRI.getRegClass(li.reg)->getName() + << ":" << li << '\n'); + } } @@ -979,45 +914,50 @@ bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) { // Single Block Splitting //===----------------------------------------------------------------------===// -/// splitSingleBlocks - Split curli into a separate live interval inside each -/// basic block in Blocks. Return true if curli has been completely replaced, -/// false if curli is still intact, and needs to be spilled or split further. -bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { - DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); - // Determine the first and last instruction using curli in each block. - typedef std::pair<SlotIndex,SlotIndex> IndexPair; - typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap; - IndexPairMap MBBRange; - for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), - E = sa_.usingInstrs_.end(); I != E; ++I) { - const MachineBasicBlock *MBB = (*I)->getParent(); - if (!Blocks.count(MBB)) +/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it +/// may be an advantage to split CurLI for the duration of the block. +bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { + // If CurLI is local to one block, there is no point to splitting it. + if (LiveBlocks.size() <= 1) + return false; + // Add blocks with multiple uses. + for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) { + const BlockInfo &BI = LiveBlocks[i]; + if (!BI.Uses) continue; - SlotIndex Idx = lis_.getInstructionIndex(*I); - DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I); - IndexPair &IP = MBBRange[MBB]; - if (!IP.first.isValid() || Idx < IP.first) - IP.first = Idx; - if (!IP.second.isValid() || Idx > IP.second) - IP.second = Idx; + unsigned Instrs = UsingBlocks.lookup(BI.MBB); + if (Instrs <= 1) + continue; + if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough) + continue; + Blocks.insert(BI.MBB); } + return !Blocks.empty(); +} + +/// splitSingleBlocks - Split CurLI into a separate live interval inside each +/// basic block in Blocks. +void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { + DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); - // Create a new interval for each block. - for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(), - E = Blocks.end(); I != E; ++I) { - IndexPair &IP = MBBRange[*I]; - DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": [" - << IP.first << ';' << IP.second << ")\n"); - assert(IP.first.isValid() && IP.second.isValid()); + for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) { + const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i]; + if (!BI.Uses || !Blocks.count(BI.MBB)) + continue; openIntv(); - enterIntvBefore(IP.first); - useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex()); - leaveIntvAfter(IP.second); + SlotIndex SegStart = enterIntvBefore(BI.FirstUse); + if (BI.LastUse < BI.LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastUse)); + } else { + // THe last use os after tha last valid split point. + SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastUse); + } closeIntv(); } - rewrite(); - return dupli_; + finish(); } @@ -1025,31 +965,29 @@ bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { // Sub Block Splitting //===----------------------------------------------------------------------===// -/// getBlockForInsideSplit - If curli is contained inside a single basic block, +/// getBlockForInsideSplit - If CurLI is contained inside a single basic block, /// and it wou pay to subdivide the interval inside that block, return it. /// Otherwise return NULL. The returned block can be passed to /// SplitEditor::splitInsideBlock. const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { // The interval must be exclusive to one block. - if (usingBlocks_.size() != 1) + if (UsingBlocks.size() != 1) return 0; // Don't to this for less than 4 instructions. We want to be sure that // splitting actually reduces the instruction count per interval. - if (usingInstrs_.size() < 4) + if (UsingInstrs.size() < 4) return 0; - return usingBlocks_.begin()->first; + return UsingBlocks.begin()->first; } -/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return -/// true if curli has been completely replaced, false if curli is still -/// intact, and needs to be spilled or split further. -bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { +/// splitInsideBlock - Split CurLI into multiple intervals inside MBB. +void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { SmallVector<SlotIndex, 32> Uses; - Uses.reserve(sa_.usingInstrs_.size()); - for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(), - E = sa_.usingInstrs_.end(); I != E; ++I) + Uses.reserve(SA.UsingInstrs.size()); + for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(), + E = SA.UsingInstrs.end(); I != E; ++I) if ((*I)->getParent() == MBB) - Uses.push_back(lis_.getInstructionIndex(*I)); + Uses.push_back(LIS.getInstructionIndex(*I)); DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " << Uses.size() << " instructions.\n"); assert(Uses.size() >= 3 && "Need at least 3 instructions"); @@ -1077,21 +1015,16 @@ bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { // First interval before the gap. Don't create single-instr intervals. if (bestPos > 1) { openIntv(); - enterIntvBefore(Uses.front()); - useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex()); - leaveIntvAfter(Uses[bestPos-1]); + useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1])); closeIntv(); } // Second interval after the gap. if (bestPos < Uses.size()-1) { openIntv(); - enterIntvBefore(Uses[bestPos]); - useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex()); - leaveIntvAfter(Uses.back()); + useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back())); closeIntv(); } - rewrite(); - return dupli_; + finish(); } diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index ddef7461dc3d..5c34afd1c819 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -1,4 +1,4 @@ -//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,125 +12,132 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/SlotIndexes.h" namespace llvm { +class ConnectedVNInfoEqClasses; class LiveInterval; class LiveIntervals; +class LiveRangeEdit; class MachineInstr; -class MachineLoop; class MachineLoopInfo; class MachineRegisterInfo; class TargetInstrInfo; +class TargetRegisterInfo; class VirtRegMap; class VNInfo; +class raw_ostream; + +/// At some point we should just include MachineDominators.h: +class MachineDominatorTree; +template <class NodeT> class DomTreeNodeBase; +typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; + /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting /// opportunities. class SplitAnalysis { public: - const MachineFunction &mf_; - const LiveIntervals &lis_; - const MachineLoopInfo &loops_; - const TargetInstrInfo &tii_; + const MachineFunction &MF; + const VirtRegMap &VRM; + const LiveIntervals &LIS; + const MachineLoopInfo &Loops; + const TargetInstrInfo &TII; // Instructions using the the current register. typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet; - InstrPtrSet usingInstrs_; + InstrPtrSet UsingInstrs; + + // Sorted slot indexes of using instructions. + SmallVector<SlotIndex, 8> UseSlots; - // The number of instructions using curli in each basic block. + // The number of instructions using CurLI in each basic block. typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap; - BlockCountMap usingBlocks_; + BlockCountMap UsingBlocks; + + /// Additional information about basic blocks where the current variable is + /// live. Such a block will look like one of these templates: + /// + /// 1. | o---x | Internal to block. Variable is only live in this block. + /// 2. |---x | Live-in, kill. + /// 3. | o---| Def, live-out. + /// 4. |---x o---| Live-in, kill, def, live-out. + /// 5. |---o---o---| Live-through with uses or defs. + /// 6. |-----------| Live-through without uses. Transparent. + /// + struct BlockInfo { + MachineBasicBlock *MBB; + SlotIndex FirstUse; ///< First instr using current reg. + SlotIndex LastUse; ///< Last instr using current reg. + SlotIndex Kill; ///< Interval end point inside block. + SlotIndex Def; ///< Interval start point inside block. + /// Last possible point for splitting live ranges. + SlotIndex LastSplitPoint; + bool Uses; ///< Current reg has uses or defs in block. + bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above). + bool LiveIn; ///< Current reg is live in. + bool LiveOut; ///< Current reg is live out. + + // Per-interference pattern scratch data. + bool OverlapEntry; ///< Interference overlaps entering interval. + bool OverlapExit; ///< Interference overlaps exiting interval. + }; - // The number of basic block using curli in each loop. - typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap; - LoopCountMap usingLoops_; + /// Basic blocks where var is live. This array is parallel to + /// SpillConstraints. + SmallVector<BlockInfo, 8> LiveBlocks; private: // Current live interval. - const LiveInterval *curli_; + const LiveInterval *CurLI; - // Sumarize statistics by counting instructions using curli_. + // Sumarize statistics by counting instructions using CurLI. void analyzeUses(); + /// calcLiveBlockInfo - Compute per-block information about CurLI. + void calcLiveBlockInfo(); + /// canAnalyzeBranch - Return true if MBB ends in a branch that can be /// analyzed. bool canAnalyzeBranch(const MachineBasicBlock *MBB); public: - SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis, + SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, const MachineLoopInfo &mli); - /// analyze - set curli to the specified interval, and analyze how it may be + /// analyze - set CurLI to the specified interval, and analyze how it may be /// split. void analyze(const LiveInterval *li); - /// removeUse - Update statistics by noting that mi no longer uses curli. - void removeUse(const MachineInstr *mi); - - const LiveInterval *getCurLI() { return curli_; } - /// clear - clear all data structures so SplitAnalysis is ready to analyze a /// new interval. void clear(); - typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet; - - // Sets of basic blocks surrounding a machine loop. - struct LoopBlocks { - BlockPtrSet Loop; // Blocks in the loop. - BlockPtrSet Preds; // Loop predecessor blocks. - BlockPtrSet Exits; // Loop exit blocks. - - void clear() { - Loop.clear(); - Preds.clear(); - Exits.clear(); - } - }; - - // Calculate the block sets surrounding the loop. - void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks); - - /// LoopPeripheralUse - how is a variable used in and around a loop? - /// Peripheral blocks are the loop predecessors and exit blocks. - enum LoopPeripheralUse { - ContainedInLoop, // All uses are inside the loop. - SinglePeripheral, // At most one instruction per peripheral block. - MultiPeripheral, // Multiple instructions in some peripheral blocks. - OutsideLoop // Uses outside loop periphery. - }; - - /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in - /// and around the Loop. - LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&); + /// getParent - Return the last analyzed interval. + const LiveInterval &getParent() const { return *CurLI; } - /// getCriticalExits - It may be necessary to partially break critical edges - /// leaving the loop if an exit block has phi uses of curli. Collect the exit - /// blocks that need special treatment into CriticalExits. - void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits); + /// hasUses - Return true if MBB has any uses of CurLI. + bool hasUses(const MachineBasicBlock *MBB) const { + return UsingBlocks.lookup(MBB); + } - /// canSplitCriticalExits - Return true if it is possible to insert new exit - /// blocks before the blocks in CriticalExits. - bool canSplitCriticalExits(const LoopBlocks &Blocks, - BlockPtrSet &CriticalExits); + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; - /// getBestSplitLoop - Return the loop where curli may best be split to a - /// separate register, or NULL. - const MachineLoop *getBestSplitLoop(); + // Print a set of blocks with use counts. + void print(const BlockPtrSet&, raw_ostream&) const; /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from - /// having curli split to a new live interval. Return true if Blocks can be + /// having CurLI split to a new live interval. Return true if Blocks can be /// passed to SplitEditor::splitSingleBlocks. bool getMultiUseBlocks(BlockPtrSet &Blocks); - /// getBlockForInsideSplit - If curli is contained inside a single basic block, - /// and it wou pay to subdivide the interval inside that block, return it. - /// Otherwise return NULL. The returned block can be passed to + /// getBlockForInsideSplit - If CurLI is contained inside a single basic + /// block, and it would pay to subdivide the interval inside that block, + /// return it. Otherwise return NULL. The returned block can be passed to /// SplitEditor::splitInsideBlock. const MachineBasicBlock *getBlockForInsideSplit(); }; @@ -140,58 +147,102 @@ public: /// interval that is a subset. Insert phi-def values as needed. This class is /// used by SplitEditor to create new smaller LiveIntervals. /// -/// parentli_ is the larger interval, li_ is the subset interval. Every value -/// in li_ corresponds to exactly one value in parentli_, and the live range -/// of the value is contained within the live range of the parentli_ value. -/// Values in parentli_ may map to any number of openli_ values, including 0. +/// ParentLI is the larger interval, LI is the subset interval. Every value +/// in LI corresponds to exactly one value in ParentLI, and the live range +/// of the value is contained within the live range of the ParentLI value. +/// Values in ParentLI may map to any number of OpenLI values, including 0. class LiveIntervalMap { - LiveIntervals &lis_; + LiveIntervals &LIS; + MachineDominatorTree &MDT; // The parent interval is never changed. - const LiveInterval &parentli_; + const LiveInterval &ParentLI; - // The child interval's values are fully contained inside parentli_ values. - LiveInterval &li_; + // The child interval's values are fully contained inside ParentLI values. + LiveInterval *LI; typedef DenseMap<const VNInfo*, VNInfo*> ValueMap; - // Map parentli_ values to simple values in li_ that are defined at the same - // SlotIndex, or NULL for parentli_ values that have complex li_ defs. + // Map ParentLI values to simple values in LI that are defined at the same + // SlotIndex, or NULL for ParentLI values that have complex LI defs. // Note there is a difference between values mapping to NULL (complex), and // values not present (unknown/unmapped). - ValueMap valueMap_; - - // extendTo - Find the last li_ value defined in MBB at or before Idx. The - // parentli_ is assumed to be live at Idx. Extend the live range to Idx. - // Return the found VNInfo, or NULL. - VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx); - - // addSimpleRange - Add a simple range from parentli_ to li_. - // ParentVNI must be live in the [Start;End) interval. - void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); + ValueMap Values; + + typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap; + + // LiveOutCache - Map each basic block where LI is live out to the live-out + // value and its defining block. One of these conditions shall be true: + // + // 1. !LiveOutCache.count(MBB) + // 2. LiveOutCache[MBB].second.getNode() == MBB + // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] + // + // This is only a cache, the values can be computed as: + // + // VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB)) + // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] + // + // The cache is also used as a visiteed set by mapValue(). + LiveOutMap LiveOutCache; + + // Dump the live-out cache to dbgs(). + void dumpCache(); public: LiveIntervalMap(LiveIntervals &lis, - const LiveInterval &parentli, - LiveInterval &li) - : lis_(lis), parentli_(parentli), li_(li) {} + MachineDominatorTree &mdt, + const LiveInterval &parentli) + : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {} + + /// reset - clear all data structures and start a new live interval. + void reset(LiveInterval *); + + /// getLI - return the current live interval. + LiveInterval *getLI() const { return LI; } - /// defValue - define a value in li_ from the parentli_ value VNI and Idx. + /// defValue - define a value in LI from the ParentLI value VNI and Idx. /// Idx does not have to be ParentVNI->def, but it must be contained within - /// ParentVNI's live range in parentli_. - /// Return the new li_ value. + /// ParentVNI's live range in ParentLI. + /// Return the new LI value. VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); - /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is + /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is /// assumed that ParentVNI is live at Idx. /// If ParentVNI has not been defined by defValue, it is assumed that /// ParentVNI->def dominates Idx. /// If ParentVNI has been defined by defValue one or more times, a value that /// dominates Idx will be returned. This may require creating extra phi-def - /// values and adding live ranges to li_. - VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx); + /// values and adding live ranges to LI. + /// If simple is not NULL, *simple will indicate if ParentVNI is a simply + /// mapped value. + VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0); + + // extendTo - Find the last LI value defined in MBB at or before Idx. The + // parentli is assumed to be live at Idx. Extend the live range to include + // Idx. Return the found VNInfo, or NULL. + VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx); + + /// isMapped - Return true is ParentVNI is a known mapped value. It may be a + /// simple 1-1 mapping or a complex mapping to later defs. + bool isMapped(const VNInfo *ParentVNI) const { + return Values.count(ParentVNI); + } + + /// isComplexMapped - Return true if ParentVNI has received new definitions + /// with defValue. + bool isComplexMapped(const VNInfo *ParentVNI) const; + + /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the + /// number of definitions. + void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; } + + // addSimpleRange - Add a simple range from ParentLI to LI. + // ParentVNI must be live in the [Start;End) interval. + void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); - /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_. + /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. /// All needed values whose def is not inside [Start;End) must be defined /// beforehand so mapValue will work. void addRange(SlotIndex Start, SlotIndex End); @@ -207,115 +258,129 @@ public: /// - Mark the ranges where the new interval is used with useIntv* /// - Mark the places where the interval is exited with exitIntv*. /// - Finish the current interval with closeIntv and repeat from 2. -/// - Rewrite instructions with rewrite(). +/// - Rewrite instructions with finish(). /// class SplitEditor { - SplitAnalysis &sa_; - LiveIntervals &lis_; - VirtRegMap &vrm_; - MachineRegisterInfo &mri_; - const TargetInstrInfo &tii_; - - /// curli_ - The immutable interval we are currently splitting. - const LiveInterval *const curli_; - - /// dupli_ - Created as a copy of curli_, ranges are carved out as new - /// intervals get added through openIntv / closeIntv. This is used to avoid - /// editing curli_. - LiveInterval *dupli_; - - /// Currently open LiveInterval. - LiveInterval *openli_; - - /// createInterval - Create a new virtual register and LiveInterval with same - /// register class and spill slot as curli. - LiveInterval *createInterval(); - - /// getDupLI - Ensure dupli is created and return it. - LiveInterval *getDupLI(); - - /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1 - /// mappings, and do not include values created by inserted copies. - DenseMap<const VNInfo*, VNInfo*> valueMap_; - - /// mapValue - Return the openIntv value that corresponds to the given curli - /// value. - VNInfo *mapValue(const VNInfo *curliVNI); - - /// A dupli value is live through openIntv. - bool liveThrough_; - - /// All the new intervals created for this split are added to intervals_. - SmallVectorImpl<LiveInterval*> &intervals_; - - /// The index into intervals_ of the first interval we added. There may be - /// others from before we got it. - unsigned firstInterval; - - /// Insert a COPY instruction curli -> li. Allocate a new value from li - /// defined by the COPY - VNInfo *insertCopy(LiveInterval &LI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); + SplitAnalysis &SA; + LiveIntervals &LIS; + VirtRegMap &VRM; + MachineRegisterInfo &MRI; + MachineDominatorTree &MDT; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + + /// Edit - The current parent register and new intervals created. + LiveRangeEdit &Edit; + + /// Index into Edit of the currently open interval. + /// The index 0 is used for the complement, so the first interval started by + /// openIntv will be 1. + unsigned OpenIdx; + + typedef IntervalMap<SlotIndex, unsigned> RegAssignMap; + + /// Allocator for the interval map. This will eventually be shared with + /// SlotIndexes and LiveIntervals. + RegAssignMap::Allocator Allocator; + + /// RegAssign - Map of the assigned register indexes. + /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at + /// Idx. + RegAssignMap RegAssign; + + /// LIMappers - One LiveIntervalMap or each interval in Edit. + SmallVector<LiveIntervalMap, 4> LIMappers; + + /// defFromParent - Define Reg from ParentVNI at UseIdx using either + /// rematerialization or a COPY from parent. Return the new value. + VNInfo *defFromParent(unsigned RegIdx, + VNInfo *ParentVNI, + SlotIndex UseIdx, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + + /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. + void rewriteAssigned(); + + /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq + /// classes in ConEQ. + /// This must be done when Intvs[0] is styill live at all uses, before calling + /// ConEq.Distribute(). + void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs, + const ConnectedVNInfoEqClasses &ConEq); public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - SmallVectorImpl<LiveInterval*> &newIntervals); + MachineDominatorTree&, LiveRangeEdit&); /// getAnalysis - Get the corresponding analysis. - SplitAnalysis &getAnalysis() { return sa_; } + SplitAnalysis &getAnalysis() { return SA; } /// Create a new virtual register and live interval. void openIntv(); - /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is - /// not live before Idx, a COPY is not inserted. - void enterIntvBefore(SlotIndex Idx); + /// enterIntvBefore - Enter the open interval before the instruction at Idx. + /// If the parent interval is not live before Idx, a COPY is not inserted. + /// Return the beginning of the new live range. + SlotIndex enterIntvBefore(SlotIndex Idx); - /// enterIntvAtEnd - Enter openli at the end of MBB. - /// PhiMBB is a successor inside openli where a PHI value is created. - /// Currently, all entries must share the same PhiMBB. - void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB); + /// enterIntvAtEnd - Enter the open interval at the end of MBB. + /// Use the open interval from he inserted copy to the MBB end. + /// Return the beginning of the new live range. + SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB); - /// useIntv - indicate that all instructions in MBB should use openli. + /// useIntv - indicate that all instructions in MBB should use OpenLI. void useIntv(const MachineBasicBlock &MBB); - /// useIntv - indicate that all instructions in range should use openli. + /// useIntv - indicate that all instructions in range should use OpenLI. void useIntv(SlotIndex Start, SlotIndex End); - /// leaveIntvAfter - Leave openli after the instruction at Idx. - void leaveIntvAfter(SlotIndex Idx); + /// leaveIntvAfter - Leave the open interval after the instruction at Idx. + /// Return the end of the live range. + SlotIndex leaveIntvAfter(SlotIndex Idx); + + /// leaveIntvBefore - Leave the open interval before the instruction at Idx. + /// Return the end of the live range. + SlotIndex leaveIntvBefore(SlotIndex Idx); /// leaveIntvAtTop - Leave the interval at the top of MBB. - /// Currently, only one value can leave the interval. - void leaveIntvAtTop(MachineBasicBlock &MBB); + /// Add liveness from the MBB top to the copy. + /// Return the end of the live range. + SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB); + + /// overlapIntv - Indicate that all instructions in range should use the open + /// interval, but also let the complement interval be live. + /// + /// This doubles the register pressure, but is sometimes required to deal with + /// register uses after the last valid split point. + /// + /// The Start index should be a return value from a leaveIntv* call, and End + /// should be in the same basic block. The parent interval must have the same + /// value across the range. + /// + void overlapIntv(SlotIndex Start, SlotIndex End); /// closeIntv - Indicate that we are done editing the currently open /// LiveInterval, and ranges can be trimmed. void closeIntv(); - /// rewrite - after all the new live ranges have been created, rewrite - /// instructions using curli to use the new intervals. - void rewrite(); + /// finish - after all the new live ranges have been created, compute the + /// remaining live range, and rewrite instructions to use the new registers. + void finish(); - // ===--- High level methods ---=== + /// dump - print the current interval maping to dbgs(). + void dump() const; - /// splitAroundLoop - Split curli into a separate live interval inside - /// the loop. Return true if curli has been completely replaced, false if - /// curli is still intact, and needs to be spilled or split further. - bool splitAroundLoop(const MachineLoop*); + // ===--- High level methods ---=== - /// splitSingleBlocks - Split curli into a separate live interval inside each - /// basic block in Blocks. Return true if curli has been completely replaced, - /// false if curli is still intact, and needs to be spilled or split further. - bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); + /// splitSingleBlocks - Split CurLI into a separate live interval inside each + /// basic block in Blocks. + void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); - /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return - /// true if curli has been completely replaced, false if curli is still - /// intact, and needs to be spilled or split further. - bool splitInsideBlock(const MachineBasicBlock *); + /// splitInsideBlock - Split CurLI into multiple intervals inside MBB. + void splitInsideBlock(const MachineBasicBlock *); }; } diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp index 38f3b1f4d35e..08aee82b8c5c 100644 --- a/lib/CodeGen/Splitter.cpp +++ b/lib/CodeGen/Splitter.cpp @@ -29,8 +29,14 @@ using namespace llvm; char LoopSplitter::ID = 0; -INITIALIZE_PASS(LoopSplitter, "loop-splitting", - "Split virtual regists across loop boundaries.", false, false); +INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting", + "Split virtual regists across loop boundaries.", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(LoopSplitter, "loop-splitting", + "Split virtual regists across loop boundaries.", false, false) namespace llvm { @@ -140,7 +146,6 @@ namespace llvm { VNInfo *newVal = getNewVNI(preHeaderRange->valno); newVal->def = copyDefIdx; newVal->setCopy(copy); - newVal->setIsDefAccurate(true); li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true); getNewLI()->addRange(LiveRange(copyDefIdx, @@ -174,13 +179,13 @@ namespace llvm { // Blow away output range definition. outRange->valno->def = ls.lis->getInvalidIndex(); - outRange->valno->setIsDefAccurate(false); li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx); + SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock); + assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 && + "PHI def index points at actual instruction."); VNInfo *newVal = - getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock), - true), - 0, false, ls.lis->getVNInfoAllocator()); + getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator()); getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx, newVal)); @@ -514,8 +519,10 @@ namespace llvm { if (!insertRange) continue; - VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader), - 0, false, lis->getVNInfoAllocator()); + SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader); + assert(lis->getInstructionFromIndex(newDefIdx) == 0 && + "PHI def index points at actual instruction."); + VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator()); li.addRange(LiveRange(lis->getMBBStartIdx(preHeader), lis->getMBBEndIdx(preHeader), newVal)); @@ -612,8 +619,11 @@ namespace llvm { lis->getMBBEndIdx(splitBlock), true); } } else if (intersects) { - VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock), - 0, false, lis->getVNInfoAllocator()); + SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock); + assert(lis->getInstructionFromIndex(newDefIdx) == 0 && + "PHI def index points at actual instruction."); + VNInfo *newVal = li.getNextValue(newDefIdx, 0, + lis->getVNInfoAllocator()); li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock), lis->getMBBEndIdx(splitBlock), newVal)); diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h index a726a7b834fb..9fb1b8b30139 100644 --- a/lib/CodeGen/Splitter.h +++ b/lib/CodeGen/Splitter.h @@ -36,7 +36,9 @@ namespace llvm { public: static char ID; - LoopSplitter() : MachineFunctionPass(ID) {} + LoopSplitter() : MachineFunctionPass(ID) { + initializeLoopSplitterPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &au) const; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 9f51778da756..fcaee4208ba3 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Attributes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -45,6 +46,8 @@ namespace { Function *F; Module *M; + DominatorTree* DT; + /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. /// @@ -62,9 +65,17 @@ namespace { bool RequiresStackProtector() const; public: static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID), TLI(0) {} + StackProtector() : FunctionPass(ID), TLI(0) { + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } StackProtector(const TargetLowering *tli) - : FunctionPass(ID), TLI(tli) {} + : FunctionPass(ID), TLI(tli) { + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); + } virtual bool runOnFunction(Function &Fn); }; @@ -72,7 +83,7 @@ namespace { char StackProtector::ID = 0; INITIALIZE_PASS(StackProtector, "stack-protector", - "Insert stack protectors", false, false); + "Insert stack protectors", false, false) FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { return new StackProtector(tli); @@ -81,6 +92,7 @@ FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); + DT = getAnalysisIfAvailable<DominatorTree>(); if (!RequiresStackProtector()) return false; @@ -135,6 +147,7 @@ bool StackProtector::RequiresStackProtector() const { /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = 0; // The basic block to jump to if check fails. + BasicBlock *FailBBDom = 0; // FailBB's dominator. AllocaInst *AI = 0; // Place on stack that stores the stack guard. Value *StackGuardVar = 0; // The stack guard variable. @@ -178,6 +191,8 @@ bool StackProtector::InsertStackProtectors() { // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); + if (DT) + FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0; } // For each block with a return instruction, convert this: @@ -204,6 +219,10 @@ bool StackProtector::InsertStackProtectors() { // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + if (DT) { + DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0); + FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB); + } // Remove default branch instruction to the new BB. BB->getTerminator()->eraseFromParent(); @@ -223,6 +242,9 @@ bool StackProtector::InsertStackProtectors() { // statements in the function. if (!FailBB) return false; + if (DT) + DT->addNewBlock(FailBB, FailBBDom); + return true; } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 8d57ae95dde2..01f5b5627f4f 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -95,9 +95,13 @@ namespace { public: static char ID; // Pass identification StackSlotColoring() : - MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {} + MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) { + initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); + } StackSlotColoring(bool RegColor) : - MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {} + MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) { + initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -145,8 +149,14 @@ namespace { char StackSlotColoring::ID = 0; -INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring", - "Stack Slot Coloring", false, false); +INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false) FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) { return new StackSlotColoring(RegColor); @@ -208,7 +218,7 @@ void StackSlotColoring::InitializeSlots() { for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { LiveInterval &li = i->second; DEBUG(li.dump()); - int FI = li.getStackSlotIndex(); + int FI = TargetRegisterInfo::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) continue; SSIntervals.push_back(&li); @@ -251,7 +261,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping, DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = li->getStackSlotIndex(); + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); if (!UsedColors[SS] || li->weight < 20) // If the weight is < 20, i.e. two references in a loop with depth 1, // don't bother with it. @@ -340,7 +350,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Record the assignment. Assignments[Color].push_back(li); - int FI = li->getStackSlotIndex(); + int FI = TargetRegisterInfo::stackSlot2Index(li->reg); DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple @@ -369,7 +379,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = li->getStackSlotIndex(); + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); int NewSS = ColorSlot(li); assert(NewSS >= 0 && "Stack coloring failed?"); SlotMapping[SS] = NewSS; @@ -382,7 +392,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { DEBUG(dbgs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; - int SS = li->getStackSlotIndex(); + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); li->weight = SlotWeights[SS]; } // Sort them by new weight. @@ -636,7 +646,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, } else { SmallVector<MachineInstr*, 4> NewMIs; bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); - Success = Success; // Silence compiler warning. + (void)Success; // Silence compiler warning. assert(Success && "Failed to unfold!"); MachineInstr *NewMI = NewMIs[0]; MBB->insert(MI, NewMI); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 894dbfa28bac..ec7829ec39fe 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -1,4 +1,4 @@ -//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// +//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// // // The LLVM Compiler Infrastructure // @@ -7,1039 +7,823 @@ // //===----------------------------------------------------------------------===// // -// This pass eliminates machine instruction PHI nodes by inserting copy -// instructions, using an intelligent copy-folding technique based on -// dominator information. This is technique is derived from: +// This pass eliminates PHI instructions by aggressively coalescing the copies +// that would be inserted by a naive algorithm and only inserting the copies +// that are necessary. The coalescing technique initially assumes that all +// registers appearing in a PHI instruction do not interfere. It then eliminates +// proven interferences, using dominators to only perform a linear number of +// interference tests instead of the quadratic number of interference tests +// that this would naively require. This is a technique derived from: // // Budimlic, et al. Fast copy coalescing and live-range identification. // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language // Design and Implementation (Berlin, Germany, June 17 - 19, 2002). // PLDI '02. ACM, New York, NY, 25-32. -// DOI= http://doi.acm.org/10.1145/512529.512534 +// +// The original implementation constructs a data structure they call a dominance +// forest for this purpose. The dominance forest was shown to be unnecessary, +// as it is possible to emulate the creation and traversal of a dominance forest +// by directly using the dominator tree, rather than actually constructing the +// dominance forest. This technique is explained in: +// +// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code +// Quality and Efficiency, +// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code +// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). +// CGO '09. IEEE, Washington, DC, 114-125. +// +// Careful implementation allows for all of the dominator forest interference +// checks to be performed at once in a single depth-first traversal of the +// dominator tree, which is what is implemented here. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "strongphielim" +#include "PHIEliminationUtils.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace llvm; namespace { - struct StrongPHIElimination : public MachineFunctionPass { + class StrongPHIElimination : public MachineFunctionPass { + public: static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(ID) {} - - // Waiting stores, for each MBB, the set of copies that need to - // be inserted into that MBB - DenseMap<MachineBasicBlock*, - std::multimap<unsigned, unsigned> > Waiting; - - // Stacks holds the renaming stack for each register - std::map<unsigned, std::vector<unsigned> > Stacks; - - // Registers in UsedByAnother are PHI nodes that are themselves - // used as operands to another PHI node - std::set<unsigned> UsedByAnother; - - // RenameSets are the is a map from a PHI-defined register - // to the input registers to be coalesced along with the - // predecessor block for those input registers. - std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets; - - // PhiValueNumber holds the ID numbers of the VNs for each phi that we're - // eliminating, indexed by the register defined by that phi. - std::map<unsigned, unsigned> PhiValueNumber; - - // Store the DFS-in number of each block - DenseMap<MachineBasicBlock*, unsigned> preorder; - - // Store the DFS-out number of each block - DenseMap<MachineBasicBlock*, unsigned> maxpreorder; - - bool runOnMachineFunction(MachineFunction &Fn); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<SlotIndexes>(); - AU.addPreserved<SlotIndexes>(); - AU.addRequired<LiveIntervals>(); - - // TODO: Actually make this true. - AU.addPreserved<LiveIntervals>(); - AU.addPreserved<RegisterCoalescer>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual void releaseMemory() { - preorder.clear(); - maxpreorder.clear(); - - Waiting.clear(); - Stacks.clear(); - UsedByAnother.clear(); - RenameSets.clear(); + StrongPHIElimination() : MachineFunctionPass(ID) { + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); } + virtual void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + private: - - /// DomForestNode - Represents a node in the "dominator forest". This is - /// a forest in which the nodes represent registers and the edges - /// represent a dominance relation in the block defining those registers. - struct DomForestNode { - private: - // Store references to our children - std::vector<DomForestNode*> children; - // The register we represent - unsigned reg; - - // Add another node as our child - void addChild(DomForestNode* DFN) { children.push_back(DFN); } - - public: - typedef std::vector<DomForestNode*>::iterator iterator; - - // Create a DomForestNode by providing the register it represents, and - // the node to be its parent. The virtual root node has register 0 - // and a null parent. - DomForestNode(unsigned r, DomForestNode* parent) : reg(r) { - if (parent) - parent->addChild(this); - } - - ~DomForestNode() { - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; - } - - /// getReg - Return the regiser that this node represents - inline unsigned getReg() { return reg; } - - // Provide iterator access to our children - inline DomForestNode::iterator begin() { return children.begin(); } - inline DomForestNode::iterator end() { return children.end(); } + /// This struct represents a single node in the union-find data structure + /// representing the variable congruence classes. There is one difference + /// from a normal union-find data structure. We steal two bits from the parent + /// pointer . One of these bits is used to represent whether the register + /// itself has been isolated, and the other is used to represent whether the + /// PHI with that register as its destination has been isolated. + /// + /// Note that this leads to the strange situation where the leader of a + /// congruence class may no longer logically be a member, due to being + /// isolated. + struct Node { + enum Flags { + kRegisterIsolatedFlag = 1, + kPHIIsolatedFlag = 2 + }; + Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } + + Node *getLeader(); + + PointerIntPair<Node*, 2> parent; + unsigned value; + unsigned rank; }; - - void computeDFS(MachineFunction& MF); - void processBlock(MachineBasicBlock* MBB); - - std::vector<DomForestNode*> computeDomForest( - std::map<unsigned, MachineBasicBlock*>& instrs, - MachineRegisterInfo& MRI); - void processPHIUnion(MachineInstr* Inst, - std::map<unsigned, MachineBasicBlock*>& PHIUnion, - std::vector<StrongPHIElimination::DomForestNode*>& DF, - std::vector<std::pair<unsigned, unsigned> >& locals); - void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed); - void InsertCopies(MachineDomTreeNode* MBB, - SmallPtrSet<MachineBasicBlock*, 16>& v); - bool mergeLiveIntervals(unsigned primary, unsigned secondary); - }; -} -char StrongPHIElimination::ID = 0; -INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false); + /// Add a register in a new congruence class containing only itself. + void addReg(unsigned); -char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; + /// Join the congruence classes of two registers. This function is biased + /// towards the left argument, i.e. after + /// + /// addReg(r2); + /// unionRegs(r1, r2); + /// + /// the leader of the unioned congruence class is the same as the leader of + /// r1's congruence class prior to the union. This is actually relied upon + /// in the copy insertion code. + void unionRegs(unsigned, unsigned); -/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree -/// of the given MachineFunction. These numbers are then used in other parts -/// of the PHI elimination process. -void StrongPHIElimination::computeDFS(MachineFunction& MF) { - SmallPtrSet<MachineDomTreeNode*, 8> frontier; - SmallPtrSet<MachineDomTreeNode*, 8> visited; - - unsigned time = 0; - - MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>(); - - MachineDomTreeNode* node = DT.getRootNode(); - - std::vector<MachineDomTreeNode*> worklist; - worklist.push_back(node); - - while (!worklist.empty()) { - MachineDomTreeNode* currNode = worklist.back(); - - if (!frontier.count(currNode)) { - frontier.insert(currNode); - ++time; - preorder.insert(std::make_pair(currNode->getBlock(), time)); - } - - bool inserted = false; - for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end(); - I != E; ++I) - if (!frontier.count(*I) && !visited.count(*I)) { - worklist.push_back(*I); - inserted = true; - break; - } - - if (!inserted) { - frontier.erase(currNode); - visited.insert(currNode); - maxpreorder.insert(std::make_pair(currNode->getBlock(), time)); - - worklist.pop_back(); + /// Get the color of a register. The color is 0 if the register has been + /// isolated. + unsigned getRegColor(unsigned); + + // Isolate a register. + void isolateReg(unsigned); + + /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been + /// isolated. Otherwise, it is the original color of its destination and + /// all of its operands (before they were isolated, if they were). + unsigned getPHIColor(MachineInstr*); + + /// Isolate a PHI. + void isolatePHI(MachineInstr*); + + /// Traverses a basic block, splitting any interferences found between + /// registers in the same congruence class. It takes two DenseMaps as + /// arguments that it also updates: CurrentDominatingParent, which maps + /// a color to the register in that congruence class whose definition was + /// most recently seen, and ImmediateDominatingParent, which maps a register + /// to the register in the same congruence class that most immediately + /// dominates it. + /// + /// This function assumes that it is being called in a depth-first traversal + /// of the dominator tree. + void SplitInterferencesForBasicBlock( + MachineBasicBlock&, + DenseMap<unsigned, unsigned> &CurrentDominatingParent, + DenseMap<unsigned, unsigned> &ImmediateDominatingParent); + + // Lowers a PHI instruction, inserting copies of the source and destination + // registers as necessary. + void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); + + // Merges the live interval of Reg into NewReg and renames Reg to NewReg + // everywhere that Reg appears. Requires Reg and NewReg to have non- + // overlapping lifetimes. + void MergeLIsAndRename(unsigned Reg, unsigned NewReg); + + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + MachineDominatorTree *DT; + LiveIntervals *LI; + + BumpPtrAllocator Allocator; + + DenseMap<unsigned, Node*> RegNodeMap; + + // Maps a basic block to a list of its defs of registers that appear as PHI + // sources. + DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs; + + // Maps a color to a pair of a MachineInstr* and a virtual register, which + // is the operand of that PHI corresponding to the current basic block. + DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor; + + // FIXME: Can these two data structures be combined? Would a std::multimap + // be any better? + + // Stores pairs of predecessor basic blocks and the source registers of + // inserted copy instructions. + typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet; + SrcCopySet InsertedSrcCopySet; + + // Maps pairs of predecessor basic blocks and colors to their defining copy + // instructions. + typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*> + SrcCopyMap; + SrcCopyMap InsertedSrcCopyMap; + + // Maps inserted destination copy registers to their defining copy + // instructions. + typedef DenseMap<unsigned, MachineInstr*> DestCopyMap; + DestCopyMap InsertedDestCopies; + }; + + struct MIIndexCompare { + MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } + + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); } - } -} -namespace { + LiveIntervals *LI; + }; +} // namespace -/// PreorderSorter - a helper class that is used to sort registers -/// according to the preorder number of their defining blocks -class PreorderSorter { -private: - DenseMap<MachineBasicBlock*, unsigned>& preorder; - MachineRegisterInfo& MRI; - -public: - PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p, - MachineRegisterInfo& M) : preorder(p), MRI(M) { } - - bool operator()(unsigned A, unsigned B) { - if (A == B) - return false; - - MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent(); - MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent(); - - if (preorder[ABlock] < preorder[BBlock]) - return true; - else if (preorder[ABlock] > preorder[BBlock]) - return false; - - return false; - } -}; +STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); +STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); +STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); +char StrongPHIElimination::ID = 0; +INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false) + +char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; + +void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); } -/// computeDomForest - compute the subforest of the DomTree corresponding -/// to the defining blocks of the registers in question -std::vector<StrongPHIElimination::DomForestNode*> -StrongPHIElimination::computeDomForest( - std::map<unsigned, MachineBasicBlock*>& regs, - MachineRegisterInfo& MRI) { - // Begin by creating a virtual root node, since the actual results - // may well be a forest. Assume this node has maximum DFS-out number. - DomForestNode* VirtualRoot = new DomForestNode(0, 0); - maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL)); - - // Populate a worklist with the registers - std::vector<unsigned> worklist; - worklist.reserve(regs.size()); - for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(), - E = regs.end(); I != E; ++I) - worklist.push_back(I->first); - - // Sort the registers by the DFS-in number of their defining block - PreorderSorter PS(preorder, MRI); - std::sort(worklist.begin(), worklist.end(), PS); - - // Create a "current parent" stack, and put the virtual root on top of it - DomForestNode* CurrentParent = VirtualRoot; - std::vector<DomForestNode*> stack; - stack.push_back(VirtualRoot); - - // Iterate over all the registers in the previously computed order - for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end(); - I != E; ++I) { - unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()]; - MachineBasicBlock* parentBlock = CurrentParent->getReg() ? - MRI.getVRegDef(CurrentParent->getReg())->getParent() : - 0; - - // If the DFS-in number of the register is greater than the DFS-out number - // of the current parent, repeatedly pop the parent stack until it isn't. - while (pre > maxpreorder[parentBlock]) { - stack.pop_back(); - CurrentParent = stack.back(); - - parentBlock = CurrentParent->getReg() ? - MRI.getVRegDef(CurrentParent->getReg())->getParent() : - 0; +static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { + // FIXME: This only needs to check from the first terminator, as only the + // first terminator can use a virtual register. + for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { + assert (RI != MBB->rend()); + MachineInstr *MI = &*RI; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + MachineOperand &MO = *OI; + if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) + return &MO; } - - // Now that we've found the appropriate parent, create a DomForestNode for - // this register and attach it to the forest - DomForestNode* child = new DomForestNode(*I, CurrentParent); - - // Push this new node on the "current parent" stack - stack.push_back(child); - CurrentParent = child; } - - // Return a vector containing the children of the virtual root node - std::vector<DomForestNode*> ret; - ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end()); - return ret; + return NULL; } -/// isLiveIn - helper method that determines, from a regno, if a register -/// is live into a block -static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, - LiveIntervals& LI) { - LiveInterval& I = LI.getOrCreateInterval(r); - SlotIndex idx = LI.getMBBStartIdx(MBB); - return I.liveAt(idx); -} +bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TII = MF.getTarget().getInstrInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + LI = &getAnalysis<LiveIntervals>(); -/// isLiveOut - help method that determines, from a regno, if a register is -/// live out of a block. -static bool isLiveOut(unsigned r, MachineBasicBlock* MBB, - LiveIntervals& LI) { - for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), - E = MBB->succ_end(); PI != E; ++PI) - if (isLiveIn(r, *PI, LI)) - return true; - - return false; -} + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); + PHISrcDefs[I].push_back(BBI); -/// interferes - checks for local interferences by scanning a block. The only -/// trick parameter is 'mode' which tells it the relationship of the two -/// registers. 0 - defined in the same block, 1 - first properly dominates -/// second, 2 - second properly dominates first -static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan, - LiveIntervals& LV, unsigned mode) { - MachineInstr* def = 0; - MachineInstr* kill = 0; - - // The code is still in SSA form at this point, so there is only one - // definition per VReg. Thus we can safely use MRI->getVRegDef(). - const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo(); - - bool interference = false; - - // Wallk the block, checking for interferences - for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end(); - MBI != MBE; ++MBI) { - MachineInstr* curr = MBI; - - // Same defining block... - if (mode == 0) { - if (curr == MRI->getVRegDef(a)) { - // If we find our first definition, save it - if (!def) { - def = curr; - // If there's already an unkilled definition, then - // this is an interference - } else if (!kill) { - interference = true; - break; - // If there's a definition followed by a KillInst, then - // they can't interfere - } else { - interference = false; - break; - } - // Symmetric with the above - } else if (curr == MRI->getVRegDef(b)) { - if (!def) { - def = curr; - } else if (!kill) { - interference = true; - break; - } else { - interference = false; - break; - } - // Store KillInsts if they match up with the definition - } else if (curr->killsRegister(a)) { - if (def == MRI->getVRegDef(a)) { - kill = curr; - } else if (curr->killsRegister(b)) { - if (def == MRI->getVRegDef(b)) { - kill = curr; - } - } - } - // First properly dominates second... - } else if (mode == 1) { - if (curr == MRI->getVRegDef(b)) { - // Definition of second without kill of first is an interference - if (!kill) { - interference = true; - break; - // Definition after a kill is a non-interference - } else { - interference = false; - break; - } - // Save KillInsts of First - } else if (curr->killsRegister(a)) { - kill = curr; - } - // Symmetric with the above - } else if (mode == 2) { - if (curr == MRI->getVRegDef(a)) { - if (!kill) { - interference = true; - break; - } else { - interference = false; - break; - } - } else if (curr->killsRegister(b)) { - kill = curr; + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + MachineOperand &SrcMO = BBI->getOperand(i); + unsigned SrcReg = SrcMO.getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI) + PHISrcDefs[DefMI->getParent()].push_back(DefMI); } } } - - return interference; -} -/// processBlock - Determine how to break up PHIs in the current block. Each -/// PHI is broken up by some combination of renaming its operands and inserting -/// copies. This method is responsible for determining which operands receive -/// which treatment. -void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { - LiveIntervals& LI = getAnalysis<LiveIntervals>(); - MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); - - // Holds names that have been added to a set in any PHI within this block - // before the current one. - std::set<unsigned> ProcessedNames; - - // Iterate over all the PHI nodes in this block - MachineBasicBlock::iterator P = MBB->begin(); - while (P != MBB->end() && P->isPHI()) { - unsigned DestReg = P->getOperand(0).getReg(); - - // Don't both doing PHI elimination for dead PHI's. - if (P->registerDefIsDead(DestReg)) { - ++P; - continue; - } + // Perform a depth-first traversal of the dominator tree, splitting + // interferences amongst PHI-congruence classes. + DenseMap<unsigned, unsigned> CurrentDominatingParent; + DenseMap<unsigned, unsigned> ImmediateDominatingParent; + for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + SplitInterferencesForBasicBlock(*DI->getBlock(), + CurrentDominatingParent, + ImmediateDominatingParent); + } - LiveInterval& PI = LI.getOrCreateInterval(DestReg); - SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex(); - VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; - PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); - - // PHIUnion is the set of incoming registers to the PHI node that - // are going to be renames rather than having copies inserted. This set - // is refinded over the course of this function. UnionedBlocks is the set - // of corresponding MBBs. - std::map<unsigned, MachineBasicBlock*> PHIUnion; - SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks; - - // Iterate over the operands of the PHI node - for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { - unsigned SrcReg = P->getOperand(i-1).getReg(); - - // Don't need to try to coalesce a register with itself. - if (SrcReg == DestReg) { - ProcessedNames.insert(SrcReg); - continue; - } - - // We don't need to insert copies for implicit_defs. - MachineInstr* DefMI = MRI.getVRegDef(SrcReg); - if (DefMI->isImplicitDef()) - ProcessedNames.insert(SrcReg); - - // Check for trivial interferences via liveness information, allowing us - // to avoid extra work later. Any registers that interfere cannot both - // be in the renaming set, so choose one and add copies for it instead. - // The conditions are: - // 1) if the operand is live into the PHI node's block OR - // 2) if the PHI node is live out of the operand's defining block OR - // 3) if the operand is itself a PHI node and the original PHI is - // live into the operand's defining block OR - // 4) if the operand is already being renamed for another PHI node - // in this block OR - // 5) if any two operands are defined in the same block, insert copies - // for one of them - if (isLiveIn(SrcReg, P->getParent(), LI) || - isLiveOut(P->getOperand(0).getReg(), - MRI.getVRegDef(SrcReg)->getParent(), LI) || - ( MRI.getVRegDef(SrcReg)->isPHI() && - isLiveIn(P->getOperand(0).getReg(), - MRI.getVRegDef(SrcReg)->getParent(), LI) ) || - ProcessedNames.count(SrcReg) || - UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) { - - // Add a copy for the selected register - MachineBasicBlock* From = P->getOperand(i).getMBB(); - Waiting[From].insert(std::make_pair(SrcReg, DestReg)); - UsedByAnother.insert(SrcReg); - } else { - // Otherwise, add it to the renaming set - PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB())); - UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent()); - } + // Insert copies for all PHI source and destination registers. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + InsertCopiesForPHI(BBI, I); } - - // Compute the dominator forest for the renaming set. This is a forest - // where the nodes are the registers and the edges represent dominance - // relations between the defining blocks of the registers - std::vector<StrongPHIElimination::DomForestNode*> DF = - computeDomForest(PHIUnion, MRI); - - // Walk DomForest to resolve interferences at an inter-block level. This - // will remove registers from the renaming set (and insert copies for them) - // if interferences are found. - std::vector<std::pair<unsigned, unsigned> > localInterferences; - processPHIUnion(P, PHIUnion, DF, localInterferences); - - // If one of the inputs is defined in the same block as the current PHI - // then we need to check for a local interference between that input and - // the PHI. - for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), - E = PHIUnion.end(); I != E; ++I) - if (MRI.getVRegDef(I->first)->getParent() == P->getParent()) - localInterferences.push_back(std::make_pair(I->first, - P->getOperand(0).getReg())); - - // The dominator forest walk may have returned some register pairs whose - // interference cannot be determined from dominator analysis. We now - // examine these pairs for local interferences. - for (std::vector<std::pair<unsigned, unsigned> >::iterator I = - localInterferences.begin(), E = localInterferences.end(); I != E; ++I) { - std::pair<unsigned, unsigned> p = *I; - - MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); - - // Determine the block we need to scan and the relationship between - // the two registers - MachineBasicBlock* scan = 0; - unsigned mode = 0; - if (MRI.getVRegDef(p.first)->getParent() == - MRI.getVRegDef(p.second)->getParent()) { - scan = MRI.getVRegDef(p.first)->getParent(); - mode = 0; // Same block - } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(), - MRI.getVRegDef(p.second)->getParent())) { - scan = MRI.getVRegDef(p.second)->getParent(); - mode = 1; // First dominates second - } else { - scan = MRI.getVRegDef(p.first)->getParent(); - mode = 2; // Second dominates first - } - - // If there's an interference, we need to insert copies - if (interferes(p.first, p.second, scan, LI, mode)) { - // Insert copies for First - for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { - if (P->getOperand(i-1).getReg() == p.first) { - unsigned SrcReg = p.first; - MachineBasicBlock* From = P->getOperand(i).getMBB(); - - Waiting[From].insert(std::make_pair(SrcReg, - P->getOperand(0).getReg())); - UsedByAnother.insert(SrcReg); - - PHIUnion.erase(SrcReg); - } - } + } + + // FIXME: Preserve the equivalence classes during copy insertion and use + // the preversed equivalence classes instead of recomputing them. + RegNodeMap.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); + + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + unsigned SrcReg = BBI->getOperand(i).getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); } } - - // Add the renaming set for this PHI node to our overall renaming information - for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(), - QE = PHIUnion.end(); QI != QE; ++QI) { - DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> " - << P->getOperand(0).getReg() << "\n"); - } - - RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); - - // Remember which registers are already renamed, so that we don't try to - // rename them for another PHI node in this block - for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(), - E = PHIUnion.end(); I != E; ++I) - ProcessedNames.insert(I->first); - - ++P; } -} -/// processPHIUnion - Take a set of candidate registers to be coalesced when -/// decomposing the PHI instruction. Use the DominanceForest to remove the ones -/// that are known to interfere, and flag others that need to be checked for -/// local interferences. -void StrongPHIElimination::processPHIUnion(MachineInstr* Inst, - std::map<unsigned, MachineBasicBlock*>& PHIUnion, - std::vector<StrongPHIElimination::DomForestNode*>& DF, - std::vector<std::pair<unsigned, unsigned> >& locals) { - - std::vector<DomForestNode*> worklist(DF.begin(), DF.end()); - SmallPtrSet<DomForestNode*, 4> visited; - - // Code is still in SSA form, so we can use MRI::getVRegDef() - MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo(); - - LiveIntervals& LI = getAnalysis<LiveIntervals>(); - unsigned DestReg = Inst->getOperand(0).getReg(); - - // DF walk on the DomForest - while (!worklist.empty()) { - DomForestNode* DFNode = worklist.back(); - - visited.insert(DFNode); - - bool inserted = false; - for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end(); - CI != CE; ++CI) { - DomForestNode* child = *CI; - - // If the current node is live-out of the defining block of one of its - // children, insert a copy for it. NOTE: The paper actually calls for - // a more elaborate heuristic for determining whether to insert copies - // for the child or the parent. In the interest of simplicity, we're - // just always choosing the parent. - if (isLiveOut(DFNode->getReg(), - MRI.getVRegDef(child->getReg())->getParent(), LI)) { - // Insert copies for parent - for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) { - if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) { - unsigned SrcReg = DFNode->getReg(); - MachineBasicBlock* From = Inst->getOperand(i).getMBB(); - - Waiting[From].insert(std::make_pair(SrcReg, DestReg)); - UsedByAnother.insert(SrcReg); - - PHIUnion.erase(SrcReg); - } - } - - // If a node is live-in to the defining block of one of its children, but - // not live-out, then we need to scan that block for local interferences. - } else if (isLiveIn(DFNode->getReg(), - MRI.getVRegDef(child->getReg())->getParent(), LI) || - MRI.getVRegDef(DFNode->getReg())->getParent() == - MRI.getVRegDef(child->getReg())->getParent()) { - // Add (p, c) to possible local interferences - locals.push_back(std::make_pair(DFNode->getReg(), child->getReg())); + DenseMap<unsigned, unsigned> RegRenamingMap; + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + while (BBI != BBE && BBI->isPHI()) { + MachineInstr *PHI = BBI; + + assert(PHI->getNumOperands() > 0); + + unsigned SrcReg = PHI->getOperand(1).getReg(); + unsigned SrcColor = getRegColor(SrcReg); + unsigned NewReg = RegRenamingMap[SrcColor]; + if (!NewReg) { + NewReg = SrcReg; + RegRenamingMap[SrcColor] = SrcReg; } - - if (!visited.count(child)) { - worklist.push_back(child); - inserted = true; + MergeLIsAndRename(SrcReg, NewReg); + + unsigned DestReg = PHI->getOperand(0).getReg(); + if (!InsertedDestCopies.count(DestReg)) + MergeLIsAndRename(DestReg, NewReg); + + for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { + unsigned SrcReg = PHI->getOperand(i).getReg(); + MergeLIsAndRename(SrcReg, NewReg); } + + ++BBI; + LI->RemoveMachineInstrFromMaps(PHI); + PHI->eraseFromParent(); + Changed = true; } - - if (!inserted) worklist.pop_back(); } -} -/// ScheduleCopies - Insert copies into predecessor blocks, scheduling -/// them properly so as to avoid the 'lost copy' and the 'virtual swap' -/// problems. -/// -/// Based on "Practical Improvements to the Construction and Destruction -/// of Static Single Assignment Form" by Briggs, et al. -void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, - std::set<unsigned>& pushed) { - // FIXME: This function needs to update LiveIntervals - std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB]; - - std::multimap<unsigned, unsigned> worklist; - std::map<unsigned, unsigned> map; - - // Setup worklist of initial copies - for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(), - E = copy_set.end(); I != E; ) { - map.insert(std::make_pair(I->first, I->first)); - map.insert(std::make_pair(I->second, I->second)); - - if (!UsedByAnother.count(I->second)) { - worklist.insert(*I); - - // Avoid iterator invalidation - std::multimap<unsigned, unsigned>::iterator OI = I; - ++I; - copy_set.erase(OI); - } else { - ++I; + // Due to the insertion of copies to split live ranges, the live intervals are + // guaranteed to not overlap, except in one case: an original PHI source and a + // PHI destination copy. In this case, they have the same value and thus don't + // truly intersect, so we merge them into the value live at that point. + // FIXME: Is there some better way we can handle this? + for (DestCopyMap::iterator I = InsertedDestCopies.begin(), + E = InsertedDestCopies.end(); I != E; ++I) { + unsigned DestReg = I->first; + unsigned DestColor = getRegColor(DestReg); + unsigned NewReg = RegRenamingMap[DestColor]; + + LiveInterval &DestLI = LI->getInterval(DestReg); + LiveInterval &NewLI = LI->getInterval(NewReg); + + assert(DestLI.ranges.size() == 1 + && "PHI destination copy's live interval should be a single live " + "range from the beginning of the BB to the copy instruction."); + LiveRange *DestLR = DestLI.begin(); + VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); + if (!NewVNI) { + NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); + MachineInstr *CopyInstr = I->second; + CopyInstr->getOperand(1).setIsKill(true); } + + LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); + NewLI.addRange(NewLR); + + LI->removeInterval(DestReg); + MRI->replaceRegWith(DestReg, NewReg); } - - LiveIntervals& LI = getAnalysis<LiveIntervals>(); - MachineFunction* MF = MBB->getParent(); - MachineRegisterInfo& MRI = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - - SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests; - - // Iterate over the worklist, inserting copies - while (!worklist.empty() || !copy_set.empty()) { - while (!worklist.empty()) { - std::multimap<unsigned, unsigned>::iterator WI = worklist.begin(); - std::pair<unsigned, unsigned> curr = *WI; - worklist.erase(WI); - - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); - - if (isLiveOut(curr.second, MBB, LI)) { - // Create a temporary - unsigned t = MF->getRegInfo().createVirtualRegister(RC); - - // Insert copy from curr.second to a temporary at - // the Phi defining curr.second - MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); - BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY), - t).addReg(curr.second); - DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t - << "\n"); - - // Push temporary on Stacks - Stacks[curr.second].push_back(t); - - // Insert curr.second in pushed - pushed.insert(curr.second); - - // Create a live interval for this temporary - InsertedPHIDests.push_back(std::make_pair(t, --PI)); - } - - // Insert copy from map[curr.first] to curr.second - BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]); - map[curr.first] = curr.second; - DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " - << curr.second << "\n"); - - // Push this copy onto InsertedPHICopies so we can - // update LiveIntervals with it. - MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); - InsertedPHIDests.push_back(std::make_pair(curr.second, --MI)); - - // If curr.first is a destination in copy_set... - for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(), - E = copy_set.end(); I != E; ) - if (curr.first == I->second) { - std::pair<unsigned, unsigned> temp = *I; - worklist.insert(temp); - - // Avoid iterator invalidation - std::multimap<unsigned, unsigned>::iterator OI = I; - ++I; - copy_set.erase(OI); - - break; - } else { - ++I; - } - } - - if (!copy_set.empty()) { - std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin(); - std::pair<unsigned, unsigned> curr = *CI; - worklist.insert(curr); - copy_set.erase(CI); - - LiveInterval& I = LI.getInterval(curr.second); - MachineBasicBlock::iterator term = MBB->getFirstTerminator(); - SlotIndex endIdx = SlotIndex(); - if (term != MBB->end()) - endIdx = LI.getInstructionIndex(term); - else - endIdx = LI.getMBBEndIdx(MBB); - - if (I.liveAt(endIdx)) { - const TargetRegisterClass *RC = - MF->getRegInfo().getRegClass(curr.first); - - // Insert a copy from dest to a new temporary t at the end of b - unsigned t = MF->getRegInfo().createVirtualRegister(RC); - BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), t).addReg(curr.second); - map[curr.second] = t; - - MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); - InsertedPHIDests.push_back(std::make_pair(t, --TI)); + + // Adjust the live intervals of all PHI source registers to handle the case + // where the PHIs in successor blocks were the only later uses of the source + // register. + for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), + E = InsertedSrcCopySet.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->first; + unsigned SrcReg = I->second; + if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) + SrcReg = RenamedRegister; + + LiveInterval &SrcLI = LI->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { + isLiveOut = true; + break; } } + + if (isLiveOut) + continue; + + MachineOperand *LastUse = findLastUse(MBB, SrcReg); + assert(LastUse); + SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); + SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); + LastUse->setIsKill(true); } - - // Renumber the instructions so that we can perform the index computations - // needed to create new live intervals. - LI.renumber(); - - // For copies that we inserted at the ends of predecessors, we construct - // live intervals. This is pretty easy, since we know that the destination - // register cannot have be in live at that point previously. We just have - // to make sure that, for registers that serve as inputs to more than one - // PHI, we don't create multiple overlapping live intervals. - std::set<unsigned> RegHandled; - for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I = - InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { - if (RegHandled.insert(I->first).second) { - LiveInterval& Int = LI.getOrCreateInterval(I->first); - SlotIndex instrIdx = LI.getInstructionIndex(I->second); - if (Int.liveAt(instrIdx.getDefIndex())) - Int.removeRange(instrIdx.getDefIndex(), - LI.getMBBEndIdx(I->second->getParent()).getNextSlot(), - true); - - LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second); - R.valno->setCopy(I->second); - R.valno->def = LI.getInstructionIndex(I->second).getDefIndex(); - } + + LI->renumber(); + + Allocator.Reset(); + RegNodeMap.clear(); + PHISrcDefs.clear(); + InsertedSrcCopySet.clear(); + InsertedSrcCopyMap.clear(); + InsertedDestCopies.clear(); + + return Changed; +} + +void StrongPHIElimination::addReg(unsigned Reg) { + if (RegNodeMap.count(Reg)) + return; + RegNodeMap[Reg] = new (Allocator) Node(Reg); +} + +StrongPHIElimination::Node* +StrongPHIElimination::Node::getLeader() { + Node *N = this; + Node *Parent = parent.getPointer(); + Node *Grandparent = Parent->parent.getPointer(); + + while (Parent != Grandparent) { + N->parent.setPointer(Grandparent); + N = Grandparent; + Parent = Parent->parent.getPointer(); + Grandparent = Parent->parent.getPointer(); } + + return Parent; } -/// InsertCopies - insert copies into MBB and all of its successors -void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, - SmallPtrSet<MachineBasicBlock*, 16>& visited) { - MachineBasicBlock* MBB = MDTN->getBlock(); - visited.insert(MBB); - - std::set<unsigned> pushed; - - LiveIntervals& LI = getAnalysis<LiveIntervals>(); - // Rewrite register uses from Stacks - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - if (I->isPHI()) - continue; - - for (unsigned i = 0; i < I->getNumOperands(); ++i) - if (I->getOperand(i).isReg() && - Stacks[I->getOperand(i).getReg()].size()) { - // Remove the live range for the old vreg. - LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); - LiveInterval::iterator OldLR = - OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex()); - if (OldLR != OldInt.end()) - OldInt.removeRange(*OldLR, true); - - // Change the register - I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back()); - - // Add a live range for the new vreg - LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); - VNInfo* FirstVN = *Int.vni_begin(); - FirstVN->setHasPHIKill(false); - LiveRange LR (LI.getMBBStartIdx(I->getParent()), - LI.getInstructionIndex(I).getUseIndex().getNextSlot(), - FirstVN); - - Int.addRange(LR); - } - } - - // Schedule the copies for this block - ScheduleCopies(MBB, pushed); - - // Recur down the dominator tree. - for (MachineDomTreeNode::iterator I = MDTN->begin(), - E = MDTN->end(); I != E; ++I) - if (!visited.count((*I)->getBlock())) - InsertCopies(*I, visited); - - // As we exit this block, pop the names we pushed while processing it - for (std::set<unsigned>::iterator I = pushed.begin(), - E = pushed.end(); I != E; ++I) - Stacks[*I].pop_back(); +unsigned StrongPHIElimination::getRegColor(unsigned Reg) { + DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg); + if (RI == RegNodeMap.end()) + return 0; + Node *Node = RI->second; + if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) + return 0; + return Node->getLeader()->value; } -bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, - unsigned secondary) { - - LiveIntervals& LI = getAnalysis<LiveIntervals>(); - LiveInterval& LHS = LI.getOrCreateInterval(primary); - LiveInterval& RHS = LI.getOrCreateInterval(secondary); - - LI.renumber(); - - DenseMap<VNInfo*, VNInfo*> VNMap; - for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - LiveRange R = *I; - - SlotIndex Start = R.start; - SlotIndex End = R.end; - if (LHS.getLiveRangeContaining(Start)) - return false; - - if (LHS.getLiveRangeContaining(End)) - return false; - - LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R); - if (RI != LHS.end() && RI->start < End) - return false; +void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { + Node *Node1 = RegNodeMap[Reg1]->getLeader(); + Node *Node2 = RegNodeMap[Reg2]->getLeader(); + + if (Node1->rank > Node2->rank) { + Node2->parent.setPointer(Node1->getLeader()); + } else if (Node1->rank < Node2->rank) { + Node1->parent.setPointer(Node2->getLeader()); + } else if (Node1 != Node2) { + Node2->parent.setPointer(Node1->getLeader()); + Node1->rank++; } - - for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - LiveRange R = *I; - VNInfo* OldVN = R.valno; - VNInfo*& NewVN = VNMap[OldVN]; - if (!NewVN) { - NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator()); - } - - LiveRange LR (R.start, R.end, NewVN); - LHS.addRange(LR); +} + +void StrongPHIElimination::isolateReg(unsigned Reg) { + Node *Node = RegNodeMap[Reg]; + Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); +} + +unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { + assert(PHI->isPHI()); + + unsigned DestReg = PHI->getOperand(0).getReg(); + Node *DestNode = RegNodeMap[DestReg]; + if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) + return 0; + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); + if (SrcColor) + return SrcColor; } - - LI.removeInterval(RHS.reg); - - return true; + return 0; } -bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { - LiveIntervals& LI = getAnalysis<LiveIntervals>(); - - // Compute DFS numbers of each block - computeDFS(Fn); - - // Determine which phi node operands need copies - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (!I->empty() && I->begin()->isPHI()) - processBlock(I); - - // Break interferences where two different phis want to coalesce - // in the same register. - std::set<unsigned> seen; - typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > - RenameSetType; - for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); - I != E; ++I) { - for (std::map<unsigned, MachineBasicBlock*>::iterator - OI = I->second.begin(), OE = I->second.end(); OI != OE; ) { - if (!seen.count(OI->first)) { - seen.insert(OI->first); - ++OI; +void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { + assert(PHI->isPHI()); + Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; + Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); +} + +/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any +/// interferences found between registers in the same congruence class. It +/// takes two DenseMaps as arguments that it also updates: +/// +/// 1) CurrentDominatingParent, which maps a color to the register in that +/// congruence class whose definition was most recently seen. +/// +/// 2) ImmediateDominatingParent, which maps a register to the register in the +/// same congruence class that most immediately dominates it. +/// +/// This function assumes that it is being called in a depth-first traversal +/// of the dominator tree. +/// +/// The algorithm used here is a generalization of the dominance-based SSA test +/// for two variables. If there are variables a_1, ..., a_n such that +/// +/// def(a_1) dom ... dom def(a_n), +/// +/// then we can test for an interference between any two a_i by only using O(n) +/// interference tests between pairs of variables. If i < j and a_i and a_j +/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). +/// Thus, in order to test for an interference involving a_i, we need only check +/// for a potential interference with a_i+1. +/// +/// This method can be generalized to arbitrary sets of variables by performing +/// a depth-first traversal of the dominator tree. As we traverse down a branch +/// of the dominator tree, we keep track of the current dominating variable and +/// only perform an interference test with that variable. However, when we go to +/// another branch of the dominator tree, the definition of the current dominating +/// variable may no longer dominate the current block. In order to correct this, +/// we need to use a stack of past choices of the current dominating variable +/// and pop from this stack until we find a variable whose definition actually +/// dominates the current block. +/// +/// There will be one push on this stack for each variable that has become the +/// current dominating variable, so instead of using an explicit stack we can +/// simply associate the previous choice for a current dominating variable with +/// the new choice. This works better in our implementation, where we test for +/// interference in multiple distinct sets at once. +void +StrongPHIElimination::SplitInterferencesForBasicBlock( + MachineBasicBlock &MBB, + DenseMap<unsigned, unsigned> &CurrentDominatingParent, + DenseMap<unsigned, unsigned> &ImmediateDominatingParent) { + // Sort defs by their order in the original basic block, as the code below + // assumes that it is processing definitions in dominance order. + std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB]; + std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); + + for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(), + BBE = DefInstrs.end(); BBI != BBE; ++BBI) { + for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), + E = (*BBI)->operands_end(); I != E; ++I) { + const MachineOperand &MO = *I; + + // FIXME: This would be faster if it were possible to bail out of checking + // an instruction's operands after the explicit defs, but this is incorrect + // for variadic instructions, which may appear before register allocation + // in the future. + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned DestReg = MO.getReg(); + if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // If the virtual register being defined is not used in any PHI or has + // already been isolated, then there are no more interferences to check. + unsigned DestColor = getRegColor(DestReg); + if (!DestColor) + continue; + + // The input to this pass sometimes is not in SSA form in every basic + // block, as some virtual registers have redefinitions. We could eliminate + // this by fixing the passes that generate the non-SSA code, or we could + // handle it here by tracking defining machine instructions rather than + // virtual registers. For now, we just handle the situation conservatively + // in a way that will possibly lead to false interferences. + unsigned &CurrentParent = CurrentDominatingParent[DestColor]; + unsigned NewParent = CurrentParent; + if (NewParent == DestReg) + continue; + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + + // If NewParent is nonzero, then its definition dominates the current + // instruction, so it is only necessary to check for the liveness of + // NewParent in order to check for an interference. + if (NewParent + && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { + // If there is an interference, always isolate the new register. This + // could be improved by using a heuristic that decides which of the two + // registers to isolate. + isolateReg(DestReg); + CurrentParent = NewParent; } else { - Waiting[OI->second].insert(std::make_pair(OI->first, I->first)); - unsigned reg = OI->first; - ++OI; - I->second.erase(reg); - DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first - << "\n"); + // If there is no interference, update ImmediateDominatingParent and set + // the CurrentDominatingParent for this color to the current register. + ImmediateDominatingParent[DestReg] = NewParent; + CurrentParent = DestReg; } } } - - // Insert copies - // FIXME: This process should probably preserve LiveIntervals - SmallPtrSet<MachineBasicBlock*, 16> visited; - MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); - InsertCopies(MDT.getRootNode(), visited); - - // Perform renaming - for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); - I != E; ++I) - while (I->second.size()) { - std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin(); - - DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); - - if (SI->first != I->first) { - if (mergeLiveIntervals(I->first, SI->first)) { - Fn.getRegInfo().replaceRegWith(SI->first, I->first); - - if (RenameSets.count(SI->first)) { - I->second.insert(RenameSets[SI->first].begin(), - RenameSets[SI->first].end()); - RenameSets.erase(SI->first); - } - } else { - // Insert a last-minute copy if a conflict was detected. - const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); - BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), I->first).addReg(SI->first); - - LI.renumber(); - - LiveInterval& Int = LI.getOrCreateInterval(I->first); - SlotIndex instrIdx = - LI.getInstructionIndex(--SI->second->getFirstTerminator()); - if (Int.liveAt(instrIdx.getDefIndex())) - Int.removeRange(instrIdx.getDefIndex(), - LI.getMBBEndIdx(SI->second).getNextSlot(), true); - - LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, - --SI->second->getFirstTerminator()); - R.valno->setCopy(--SI->second->getFirstTerminator()); - R.valno->def = instrIdx.getDefIndex(); - - DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> " - << I->first << "\n"); - } + + // We now walk the PHIs in successor blocks and check for interferences. This + // is necesary because the use of a PHI's operands are logically contained in + // the predecessor block. The def of a PHI's destination register is processed + // along with the other defs in a basic block. + + CurrentPHIForColor.clear(); + + for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + MachineInstr *PHI = BBI; + + // If a PHI is already isolated, either by being isolated directly or + // having all of its operands isolated, ignore it. + unsigned Color = getPHIColor(PHI); + if (!Color) + continue; + + // Find the index of the PHI operand that corresponds to this basic block. + unsigned PredIndex; + for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { + if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) + break; } - - LiveInterval& Int = LI.getOrCreateInterval(I->first); - const LiveRange* LR = - Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second)); - LR->valno->setHasPHIKill(true); - - I->second.erase(SI->first); + assert(PredIndex < PHI->getNumOperands()); + unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + unsigned &CurrentParent = CurrentDominatingParent[Color]; + unsigned NewParent = CurrentParent; + while (NewParent + && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + CurrentParent = NewParent; + + // If there is an interference with a register, always isolate the + // register rather than the PHI. It is also possible to isolate the + // PHI, but that introduces copies for all of the registers involved + // in that PHI. + if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) + && NewParent != PredOperandReg) + isolateReg(NewParent); + + std::pair<MachineInstr*, unsigned> + &CurrentPHI = CurrentPHIForColor[Color]; + + // If two PHIs have the same operand from every shared predecessor, then + // they don't actually interfere. Otherwise, isolate the current PHI. This + // could possibly be improved, e.g. we could isolate the PHI with the + // fewest operands. + if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) + isolatePHI(PHI); + else + CurrentPHI = std::make_pair(PHI, PredOperandReg); } - - // Remove PHIs - std::vector<MachineInstr*> phis; - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); - BI != BE; ++BI) - if (BI->isPHI()) - phis.push_back(BI); } - - for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end(); - I != E; ) { - MachineInstr* PInstr = *(I++); - - // If this is a dead PHI node, then remove it from LiveIntervals. - unsigned DestReg = PInstr->getOperand(0).getReg(); - LiveInterval& PI = LI.getInterval(DestReg); - if (PInstr->registerDefIsDead(DestReg)) { - if (PI.containsOneValue()) { - LI.removeInterval(DestReg); +} + +void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, + MachineBasicBlock *MBB) { + assert(PHI->isPHI()); + ++NumPHIsLowered; + unsigned PHIColor = getPHIColor(PHI); + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + MachineOperand &SrcMO = PHI->getOperand(i); + + // If a source is defined by an implicit def, there is no need to insert a + // copy in the predecessor. + if (SrcMO.isUndef()) + continue; + + unsigned SrcReg = SrcMO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); + unsigned SrcColor = getRegColor(SrcReg); + + // If neither the PHI nor the operand were isolated, then we only need to + // set the phi-kill flag on the VNInfo at this PHI. + if (PHIColor && SrcColor == PHIColor) { + LiveInterval &SrcInterval = LI->getInterval(SrcReg); + SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); + VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex()); + assert(SrcVNI); + SrcVNI->setHasPHIKill(true); + continue; + } + + unsigned CopyReg = 0; + if (PHIColor) { + SrcCopyMap::const_iterator I + = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); + CopyReg + = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; + } + + if (!CopyReg) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + CopyReg = MRI->createVirtualRegister(RC); + + MachineBasicBlock::iterator + CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); + unsigned SrcSubReg = SrcMO.getSubReg(); + MachineInstr *CopyInstr = BuildMI(*PredBB, + CopyInsertPoint, + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + CopyReg).addReg(SrcReg, 0, SrcSubReg); + LI->InsertMachineInstrInMaps(CopyInstr); + ++NumSrcCopiesInserted; + + // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for + // the newly added range. + LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); + InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); + + addReg(CopyReg); + if (PHIColor) { + unionRegs(PHIColor, CopyReg); + assert(getRegColor(CopyReg) != CopyReg); } else { - SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex(); - PI.removeRange(*PI.getLiveRangeContaining(idx), true); - } - } else { - // Trim live intervals of input registers. They are no longer live into - // this block if they died after the PHI. If they lived after it, don't - // trim them because they might have other legitimate uses. - for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { - unsigned reg = PInstr->getOperand(i).getReg(); - - MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); - LiveInterval& InputI = LI.getInterval(reg); - if (MBB != PInstr->getParent() && - InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && - InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex())) - InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), - LI.getInstructionIndex(PInstr), - true); + PHIColor = CopyReg; + assert(getRegColor(CopyReg) == CopyReg); } - - // If the PHI is not dead, then the valno defined by the PHI - // now has an unknown def. - SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex(); - const LiveRange* PLR = PI.getLiveRangeContaining(idx); - PLR->valno->setIsPHIDef(true); - LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), - PLR->start, PLR->valno); - PI.addRange(R); + + if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor))) + InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr; } - - LI.RemoveMachineInstrFromMaps(PInstr); - PInstr->eraseFromParent(); + + SrcMO.setReg(CopyReg); + + // If SrcReg is not live beyond the PHI, trim its interval so that it is no + // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are + // processed later, but this is still correct to do at this point because we + // never rely on LiveIntervals being correct while inserting copies. + // FIXME: Should this just count uses at PHIs like the normal PHIElimination + // pass does? + LiveInterval &SrcLI = LI->getInterval(SrcReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); + if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) + SrcLI.removeRange(MBBStartIndex, PHIIndex, true); } - - LI.renumber(); - - return true; + + unsigned DestReg = PHI->getOperand(0).getReg(); + unsigned DestColor = getRegColor(DestReg); + + if (PHIColor && DestColor == PHIColor) { + LiveInterval &DestLI = LI->getInterval(DestReg); + + // Set the phi-def flag for the VN at this PHI. + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); + assert(DestVNI); + DestVNI->setIsPHIDef(true); + + // Prior to PHI elimination, the live ranges of PHIs begin at their defining + // instruction. After PHI elimination, PHI instructions are replaced by VNs + // with the phi-def flag set, and the live ranges of these VNs start at the + // beginning of the basic block. + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + DestVNI->def = MBBStartIndex; + DestLI.addRange(LiveRange(MBBStartIndex, + PHIIndex.getDefIndex(), + DestVNI)); + return; + } + + const TargetRegisterClass *RC = MRI->getRegClass(DestReg); + unsigned CopyReg = MRI->createVirtualRegister(RC); + + MachineInstr *CopyInstr = BuildMI(*MBB, + MBB->SkipPHIsAndLabels(MBB->begin()), + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + DestReg).addReg(CopyReg); + LI->InsertMachineInstrInMaps(CopyInstr); + PHI->getOperand(0).setReg(CopyReg); + ++NumDestCopiesInserted; + + // Add the region from the beginning of MBB to the copy instruction to + // CopyReg's live interval, and give the VNInfo the phidef flag. + LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); + VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, + CopyInstr, + LI->getVNInfoAllocator()); + CopyVNI->setIsPHIDef(true); + CopyLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getDefIndex(), + CopyVNI)); + + // Adjust DestReg's live interval to adjust for its new definition at + // CopyInstr. + LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); + + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); + assert(DestVNI); + DestVNI->def = DestCopyIndex.getDefIndex(); + + InsertedDestCopies[CopyReg] = CopyInstr; +} + +void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { + if (Reg == NewReg) + return; + + LiveInterval &OldLI = LI->getInterval(Reg); + LiveInterval &NewLI = LI->getInterval(NewReg); + + // Merge the live ranges of the two registers. + DenseMap<VNInfo*, VNInfo*> VNMap; + for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); + LRI != LRE; ++LRI) { + LiveRange OldLR = *LRI; + VNInfo *OldVN = OldLR.valno; + + VNInfo *&NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); + VNMap[OldVN] = NewVN; + } + + LiveRange LR(OldLR.start, OldLR.end, NewVN); + NewLI.addRange(LR); + } + + // Remove the LiveInterval for the register being renamed and replace all + // of its defs and uses with the new register. + LI->removeInterval(Reg); + MRI->replaceRegWith(Reg, NewReg); } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index a815b364d54e..04d3d311b416 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -350,7 +350,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isDef()) { const TargetRegisterClass *RC = MRI->getRegClass(Reg); @@ -459,15 +459,19 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + if (TailDuplicateSize.getNumOccurrences() == 0 && + MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; if (PreRegAlloc) { - // Pre-regalloc tail duplication hurts compile time and doesn't help - // much except for indirect branches. - if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch()) + if (TailBB->empty()) + return false; + const TargetInstrDesc &TID = TailBB->back().getDesc(); + // Pre-regalloc tail duplication hurts compile time and doesn't help + // much except for indirect branches and returns. + if (!TID.isIndirectBranch() && !TID.isReturn()) return false; // If the target has hardware branch prediction that can handle indirect // branches, duplicating them can often make them predictable when there @@ -500,9 +504,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; } - // Heuristically, don't tail-duplicate calls if it would expand code size, - // as it's less likely to be worth the extra cost. - if (InstrCount > 1 && HasCall) + // Don't tail-duplicate calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (InstrCount > 1 && (PreRegAlloc && HasCall)) return false; DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 6e4a0d837ecd..15340a3f1084 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -22,13 +22,18 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt<bool> DisableHazardRecognizer( + "disable-sched-hazard", cl::Hidden, cl::init(false), + cl::desc("Disable hazard detection during preRA scheduling")); + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void @@ -135,7 +140,7 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isPredicable()) return false; - + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { if (TID.OpInfo[i].isPredicate()) { MachineOperand &MO = MI->getOperand(i); @@ -166,8 +171,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MBB.insert(I, MI); } -bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const { +bool +TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } @@ -252,9 +259,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), - Flags, /*Offset=*/0, - MFI.getObjectSize(FI), + MF.getMachineMemOperand( + MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); @@ -329,8 +336,13 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, const TargetInstrDesc &TID = MI->getDesc(); // Avoid instructions obviously unsafe for remat. - if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() || - TID.mayStore()) + if (TID.isNotDuplicable() || TID.mayStore() || + MI->hasUnmodeledSideEffects()) + return false; + + // Don't remat inline asm. We have no idea how expensive it is + // even if it's side effect free. + if (MI->isInlineAsm()) return false; // Avoid instructions which load from potentially varying memory. @@ -414,8 +426,24 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, return false; } +// Provide a global flag for disabling the PreRA hazard recognizer that targets +// may choose to honor. +bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const { + return !DisableHazardRecognizer; +} + +// Default implementation of CreateTargetRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Dummy hazard recognizer allows all instructions to issue. + return new ScheduleHazardRecognizer(); +} + // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { - return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II); +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f1e10eec724c..0b7bd98cc692 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -29,10 +29,12 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" using namespace llvm; using namespace dwarf; @@ -45,81 +47,81 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, TargetLoweringObjectFile::Initialize(Ctx, TM); BSSSection = - getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".bss", ELF::SHT_NOBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getBSS()); TextSection = - getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_EXECINSTR | - MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".text", ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | + ELF::SHF_ALLOC, SectionKind::getText()); DataSection = - getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".data", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getDataRel()); ReadOnlySection = - getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".rodata", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, SectionKind::getReadOnly()); TLSDataSection = - getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, + getContext().getELFSection(".tdata", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_TLS | + ELF::SHF_WRITE, SectionKind::getThreadData()); TLSBSSSection = - getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | - MCSectionELF::SHF_WRITE, + getContext().getELFSection(".tbss", ELF::SHT_NOBITS, + ELF::SHF_ALLOC | ELF::SHF_TLS | + ELF::SHF_WRITE, SectionKind::getThreadBSS()); DataRelSection = - getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, SectionKind::getDataRel()); DataRelLocalSection = - getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, SectionKind::getDataRelLocal()); DataRelROSection = - getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, SectionKind::getReadOnlyWithRel()); DataRelROLocalSection = - getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, SectionKind::getReadOnlyWithRelLocal()); MergeableConst4Section = - getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_MERGE, SectionKind::getMergeableConst4()); MergeableConst8Section = - getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_MERGE, SectionKind::getMergeableConst8()); MergeableConst16Section = - getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE, + getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_MERGE, SectionKind::getMergeableConst16()); StaticCtorSection = - getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + getContext().getELFSection(".ctors", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, SectionKind::getDataRel()); StaticDtorSection = - getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, + getContext().getELFSection(".dtors", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC |ELF::SHF_WRITE, SectionKind::getDataRel()); // Exception Handling Sections. @@ -129,50 +131,50 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, // runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. LSDASection = - getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC, + getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, SectionKind::getReadOnly()); - EHFrameSection = - getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE, - SectionKind::getDataRel()); - // Debug Info Sections. DwarfAbbrevSection = - getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfInfoSection = - getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfLineSection = - getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfFrameSection = - getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfPubNamesSection = - getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfPubTypesSection = - getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfStrSection = - getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfLocSection = - getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfARangesSection = - getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfRangesSection = - getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfMacroInfoSection = - getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); } +const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { + return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getDataRel()); +} static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { @@ -208,18 +210,18 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { static unsigned getELFSectionType(StringRef Name, SectionKind K) { if (Name == ".init_array") - return MCSectionELF::SHT_INIT_ARRAY; + return ELF::SHT_INIT_ARRAY; if (Name == ".fini_array") - return MCSectionELF::SHT_FINI_ARRAY; + return ELF::SHT_FINI_ARRAY; if (Name == ".preinit_array") - return MCSectionELF::SHT_PREINIT_ARRAY; + return ELF::SHT_PREINIT_ARRAY; if (K.isBSS() || K.isThreadBSS()) - return MCSectionELF::SHT_NOBITS; + return ELF::SHT_NOBITS; - return MCSectionELF::SHT_PROGBITS; + return ELF::SHT_PROGBITS; } @@ -228,24 +230,24 @@ getELFSectionFlags(SectionKind K) { unsigned Flags = 0; if (!K.isMetadata()) - Flags |= MCSectionELF::SHF_ALLOC; + Flags |= ELF::SHF_ALLOC; if (K.isText()) - Flags |= MCSectionELF::SHF_EXECINSTR; + Flags |= ELF::SHF_EXECINSTR; if (K.isWriteable()) - Flags |= MCSectionELF::SHF_WRITE; + Flags |= ELF::SHF_WRITE; if (K.isThreadLocal()) - Flags |= MCSectionELF::SHF_TLS; + Flags |= ELF::SHF_TLS; // K.isMergeableConst() is left out to honour PR4650 if (K.isMergeableCString() || K.isMergeableConst4() || K.isMergeableConst8() || K.isMergeableConst16()) - Flags |= MCSectionELF::SHF_MERGE; + Flags |= ELF::SHF_MERGE; if (K.isMergeableCString()) - Flags |= MCSectionELF::SHF_STRINGS; + Flags |= ELF::SHF_STRINGS; return Flags; } @@ -261,23 +263,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return getContext().getELFSection(SectionName, getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind, true); -} - -static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { - if (Kind.isText()) return ".gnu.linkonce.t."; - if (Kind.isReadOnly()) return ".gnu.linkonce.r."; - - if (Kind.isThreadData()) return ".gnu.linkonce.td."; - if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; - - if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; - if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; - if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; - if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; - - assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); - return ".gnu.linkonce.d.rel.ro."; + getELFSectionFlags(Kind), Kind); } /// getSectionPrefixForGlobal - Return the section prefix name used by options @@ -307,7 +293,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, bool EmitUniquedSection; if (Kind.isText()) EmitUniquedSection = TM.getFunctionSections(); - else + else EmitUniquedSection = TM.getDataSections(); // If this global is linkonce/weak and the target handles this by emitting it @@ -315,19 +301,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if ((GV->isWeakForLinker() || EmitUniquedSection) && !Kind.isCommon() && !Kind.isBSS()) { const char *Prefix; - if (GV->isWeakForLinker()) - Prefix = getSectionPrefixForUniqueGlobal(Kind); - else { - assert(EmitUniquedSection); - Prefix = getSectionPrefixForGlobal(Kind); - } + Prefix = getSectionPrefixForGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); MCSymbol *Sym = Mang->getSymbol(GV); Name.append(Sym->getName().begin(), Sym->getName().end()); + StringRef Group = ""; + unsigned Flags = getELFSectionFlags(Kind); + if (GV->isWeakForLinker()) { + Group = Sym->getName(); + Flags |= ELF::SHF_GROUP; + } + return getContext().getELFSection(Name.str(), getELFSectionType(Name.str(), Kind), - getELFSectionFlags(Kind), Kind); + Flags, Kind, 0, Group); } if (Kind.isText()) return TextSection; @@ -352,10 +340,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, std::string Name = SizeSpec + utostr(Align); - return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionELF::SHF_MERGE | - MCSectionELF::SHF_STRINGS, + return getContext().getELFSection(Name, ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | + ELF::SHF_MERGE | + ELF::SHF_STRINGS, Kind); } @@ -450,7 +438,16 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, IsFunctionEHSymbolGlobal = true; IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; - + + Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); + if (T.getOS() == Triple::Darwin) { + unsigned MajNum = T.getDarwinMajorNumber(); + if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger + CommDirectiveSupportsAlignment = false; + if (MajNum > 9) // 10.6 SnowLeopard + IsFunctionEHSymbolGlobal = false; + } + TargetLoweringObjectFile::Initialize(Ctx, TM); TextSection // .text @@ -469,20 +466,20 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, = getContext().getMachOSection("__DATA", "__thread_bss", MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, SectionKind::getThreadBSS()); - + // TODO: Verify datarel below. TLSTLVSection // .tlv = getContext().getMachOSection("__DATA", "__thread_vars", MCSectionMachO::S_THREAD_LOCAL_VARIABLES, SectionKind::getDataRel()); - + TLSThreadInitSection = getContext().getMachOSection("__DATA", "__thread_init", MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, SectionKind::getDataRel()); - + CStringSection // .cstring - = getContext().getMachOSection("__TEXT", "__cstring", + = getContext().getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, SectionKind::getMergeable1ByteCString()); UStringSection @@ -493,7 +490,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, MCSectionMachO::S_4BYTE_LITERALS, SectionKind::getMergeableConst4()); EightByteConstantSection // .literal8 - = getContext().getMachOSection("__TEXT", "__literal8", + = getContext().getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, SectionKind::getMergeableConst8()); @@ -517,14 +514,14 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, SectionKind::getText()); ConstTextCoalSection - = getContext().getMachOSection("__TEXT", "__const_coal", + = getContext().getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, SectionKind::getReadOnly()); ConstDataSection // .const_data = getContext().getMachOSection("__DATA", "__const", 0, SectionKind::getReadOnlyWithRel()); DataCoalSection - = getContext().getMachOSection("__DATA","__datacoal_nt", + = getContext().getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, SectionKind::getDataRel()); DataCommonSection @@ -534,7 +531,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, DataBSSSection = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL, SectionKind::getBSS()); - + LazySymbolPointerSection = getContext().getMachOSection("__DATA", "__la_symbol_ptr", @@ -566,17 +563,9 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, // Exception Handling. LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0, SectionKind::getReadOnlyWithRel()); - EHFrameSection = - getContext().getMachOSection("__TEXT", "__eh_frame", - MCSectionMachO::S_COALESCED | - MCSectionMachO::S_ATTR_NO_TOC | - MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | - MCSectionMachO::S_ATTR_LIVE_SUPPORT, - SectionKind::getReadOnly()); - // Debug Information. DwarfAbbrevSection = - getContext().getMachOSection("__DWARF", "__debug_abbrev", + getContext().getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); DwarfInfoSection = @@ -623,10 +612,19 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, getContext().getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); - + TLSExtraDataSection = TLSTLVSection; } +const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const { + return getContext().getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); +} + const MCSection *TargetLoweringObjectFileMachO:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { @@ -665,7 +663,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - + // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; if (Kind.isThreadData()) return TLSDataSection; @@ -685,7 +683,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isMergeable1ByteCString() && TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) return CStringSection; - + // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. @@ -721,7 +719,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // with the .zerofill directive (aka .lcomm). if (Kind.isBSSLocal()) return DataBSSSection; - + // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -858,13 +856,6 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); - EHFrameSection = - getContext().getCOFFSection(".eh_frame", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, - SectionKind::getDataRel()); - // Debug info. DwarfAbbrevSection = getContext().getCOFFSection(".debug_abbrev", @@ -928,6 +919,15 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, SectionKind::getMetadata()); } +const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const { + return getContext().getCOFFSection(".eh_frame", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getDataRel()); +} + + static unsigned getCOFFSectionFlags(SectionKind K) { unsigned Flags = 0; @@ -938,6 +938,7 @@ getCOFFSectionFlags(SectionKind K) { else if (K.isText()) Flags |= COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_CNT_CODE; else if (K.isBSS ()) Flags |= @@ -967,12 +968,12 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) - return ".text$linkonce"; + return ".text$"; if (Kind.isBSS ()) - return ".bss$linkonce"; + return ".bss$"; if (Kind.isWriteable()) - return ".data$linkonce"; - return ".rdata$linkonce"; + return ".data$"; + return ".rdata$"; } @@ -987,14 +988,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); MCSymbol *Sym = Mang->getSymbol(GV); - Name.append(Sym->getName().begin(), Sym->getName().end()); + Name.append(Sym->getName().begin() + 1, Sym->getName().end()); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); + COFF::IMAGE_COMDAT_SELECT_ANY, Kind); } if (Kind.isText()) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 78989c567e42..b3120b8be1ab 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -110,7 +110,7 @@ namespace { bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned RegB, unsigned Dist); + unsigned RegA, unsigned RegB, unsigned Dist); typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, @@ -138,7 +138,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - TwoAddressInstructionPass() : MachineFunctionPass(ID) {} + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -146,10 +148,7 @@ namespace { AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); - if (StrongPHIElim) - AU.addPreservedID(StrongPHIEliminationID); - else - AU.addPreservedID(PHIEliminationID); + AU.addPreservedID(PHIEliminationID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -159,8 +158,11 @@ namespace { } char TwoAddressInstructionPass::ID = 0; -INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction", - "Two-Address instruction pass", false, false); +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; @@ -548,8 +550,9 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, unsigned FromRegC = getMappedReg(regC, SrcRegMap); unsigned ToRegB = getMappedReg(regB, DstRegMap); unsigned ToRegC = getMappedReg(regC, DstRegMap); - if (!regsAreCompatible(FromRegB, ToRegB, TRI) && - (regsAreCompatible(FromRegB, ToRegC, TRI) || + if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) && + ((!FromRegC && !ToRegC) || + regsAreCompatible(FromRegB, ToRegC, TRI) || regsAreCompatible(FromRegC, ToRegB, TRI))) return true; @@ -630,7 +633,8 @@ bool TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned RegB, unsigned Dist) { + unsigned RegA, unsigned RegB, + unsigned Dist) { MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); if (NewMI) { DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); @@ -650,6 +654,10 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, mi = NewMI; nmi = llvm::next(mi); } + + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); return true; } @@ -740,7 +748,7 @@ static bool isSafeToDelete(MachineInstr *MI, const TargetInstrDesc &TID = MI->getDesc(); if (TID.mayStore() || TID.isCall()) return false; - if (TID.isTerminator() || TID.hasUnmodeledSideEffects()) + if (TID.isTerminator() || MI->hasUnmodeledSideEffects()) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -884,7 +892,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // three-address instruction. Check if it is profitable. if (!regBKilled || isProfitableToConv3Addr(regA)) { // Try to convert it. - if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { ++NumConvertedTo3Addr; return true; // Done with this instruction. } @@ -951,7 +959,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (LV) { for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() != 0 && + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { @@ -1013,8 +1021,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs; - ReMatRegs.resize(MRI->getLastVirtReg()+1); + BitVector ReMatRegs(MRI->getNumVirtRegs()); typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > TiedOperandMap; @@ -1143,7 +1150,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); - ReMatRegs.set(regB); + ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); ++NumReMats; } else { BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), @@ -1229,13 +1236,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } // Some remat'ed instructions are dead. - int VReg = ReMatRegs.find_first(); - while (VReg != -1) { + for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { + unsigned VReg = TargetRegisterInfo::index2VirtReg(i); if (MRI->use_nodbg_empty(VReg)) { MachineInstr *DefMI = MRI->getVRegDef(VReg); DefMI->eraseFromParent(); } - VReg = ReMatRegs.find_next(VReg); } // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve @@ -1346,7 +1352,6 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, continue; // Insert a copy to replace the original. - MachineBasicBlock::iterator InsertLoc = SomeMI; MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, SomeMI->getDebugLoc(), TII->get(TargetOpcode::COPY)) @@ -1412,6 +1417,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallSet<unsigned, 4> Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SubIdx = MI->getOperand(i+1).getImm(); if (MI->getOperand(i).getSubReg() || TargetRegisterInfo::isPhysicalRegister(SrcReg)) { DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); @@ -1431,7 +1437,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { bool isKill = MI->getOperand(i).isKill(); if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || - !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) { + !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) || + !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), + MRI->getRegClass(SrcReg), SubIdx)) { // REG_SEQUENCE cannot have duplicated operands, add a copy. // Also add an copy if the source is live-in the block. We don't want // to end up with a partial-redef of a livein, e.g. @@ -1460,7 +1468,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineBasicBlock::iterator InsertLoc = MI; MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm()) + .addReg(DstReg, RegState::Define, SubIdx) .addReg(SrcReg, getKillRegState(isKill)); MI->getOperand(i).setReg(0); if (LV && isKill) diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 6dd333358bc4..48d8ab1658da 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -26,6 +26,7 @@ #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Type.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -43,16 +44,19 @@ namespace { virtual bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - UnreachableBlockElim() : FunctionPass(ID) {} + UnreachableBlockElim() : FunctionPass(ID) { + initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); } }; } char UnreachableBlockElim::ID = 0; INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", - "Remove unreachable blocks from the CFG", false, false); + "Remove unreachable blocks from the CFG", false, false) FunctionPass *llvm::createUnreachableBlockEliminationPass() { return new UnreachableBlockElim(); @@ -106,7 +110,7 @@ namespace { char UnreachableMachineBlockElim::ID = 0; INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", - "Remove unreachable machine basic blocks", false, false); + "Remove unreachable machine basic blocks", false, false) char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; @@ -118,6 +122,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { SmallPtrSet<MachineBasicBlock*, 8> Reachable; + bool ModifiedPHI = false; MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); @@ -179,6 +184,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (!preds.count(phi->getOperand(i).getMBB())) { phi->RemoveOperand(i); phi->RemoveOperand(i-1); + ModifiedPHI = true; } if (phi->getNumOperands() == 3) { @@ -188,6 +194,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { MachineInstr* temp = phi; ++phi; temp->eraseFromParent(); + ModifiedPHI = true; if (Input != Output) F.getRegInfo().replaceRegWith(Output, Input); @@ -201,5 +208,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { F.RenumberBlocks(); - return DeadBlocks.size(); + return (DeadBlocks.size() || ModifiedPHI); } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 20ffcffa70d3..734b87e62f62 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -48,7 +49,7 @@ STATISTIC(NumSpills , "Number of register spills"); char VirtRegMap::ID = 0; -INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false); +INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false) bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); @@ -74,8 +75,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { EmergencySpillSlots.clear(); SpillSlotToUsesMap.resize(8); - ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1- - TargetRegisterInfo::FirstVirtualRegister); + ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs()); allocatableRCRegs.clear(); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), @@ -89,24 +89,37 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { } void VirtRegMap::grow() { - unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); - Virt2PhysMap.grow(LastVirtReg); - Virt2StackSlotMap.grow(LastVirtReg); - Virt2ReMatIdMap.grow(LastVirtReg); - Virt2SplitMap.grow(LastVirtReg); - Virt2SplitKillMap.grow(LastVirtReg); - ReMatMap.grow(LastVirtReg); - ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1); + unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); + Virt2PhysMap.resize(NumRegs); + Virt2StackSlotMap.resize(NumRegs); + Virt2ReMatIdMap.resize(NumRegs); + Virt2SplitMap.resize(NumRegs); + Virt2SplitKillMap.resize(NumRegs); + ReMatMap.resize(NumRegs); + ImplicitDefed.resize(NumRegs); +} + +unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + if (LowSpillSlot == NO_STACK_SLOT) + LowSpillSlot = SS; + if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) + HighSpillSlot = SS; + assert(SS >= LowSpillSlot && "Unexpected low spill slot"); + unsigned Idx = SS-LowSpillSlot; + while (Idx >= SpillSlotToUsesMap.size()) + SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); + return SS; } unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) { std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg); unsigned physReg = Hint.second; - if (physReg && - TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg)) + if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg)) physReg = getPhys(physReg); if (Hint.first == 0) - return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg)) + return (TargetRegisterInfo::isPhysicalRegister(physReg)) ? physReg : 0; return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF); } @@ -116,18 +129,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && "attempt to assign stack slot to already spilled register"); const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); - int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); - if (LowSpillSlot == NO_STACK_SLOT) - LowSpillSlot = SS; - if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) - HighSpillSlot = SS; - unsigned Idx = SS-LowSpillSlot; - while (Idx >= SpillSlotToUsesMap.size()) - SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); - Virt2StackSlotMap[virtReg] = SS; ++NumSpills; - return SS; + return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); } void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { @@ -160,14 +163,7 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { EmergencySpillSlots.find(RC); if (I != EmergencySpillSlots.end()) return I->second; - int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); - if (LowSpillSlot == NO_STACK_SLOT) - LowSpillSlot = SS; - if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) - HighSpillSlot = SS; - EmergencySpillSlots[RC] = SS; - return SS; + return EmergencySpillSlots[RC] = createSpillSlot(RC); } void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { @@ -232,10 +228,11 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { UnusedRegs.resize(NumRegs); BitVector Used(NumRegs); - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) - if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) - Used.set(Virt2PhysMap[i]); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) + Used.set(Virt2PhysMap[Reg]); + } BitVector Allocatable = TRI->getAllocatableSet(*MF); bool AnyUnused = false; @@ -258,23 +255,97 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { return AnyUnused; } +void VirtRegMap::rewrite(SlotIndexes *Indexes) { + DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" + << "********** Function: " + << MF->getFunction()->getName() << '\n'); + + SmallVector<unsigned, 8> SuperKills; + + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + DEBUG(MBBI->print(dbgs(), Indexes)); + for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end(); + MII != MIE;) { + MachineInstr *MI = MII; + ++MII; + + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + unsigned VirtReg = MO.getReg(); + unsigned PhysReg = getPhys(VirtReg); + assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg"); + + // Preserve semantics of sub-register operands. + if (MO.getSubReg()) { + // A virtual register kill refers to the whole register, so we may + // have to add <imp-use,kill> operands for the super-register. + if (MO.isUse() && MO.isKill() && !MO.isUndef()) + SuperKills.push_back(PhysReg); + + // We don't have to deal with sub-register defs because + // LiveIntervalAnalysis already added the necessary <imp-def> + // operands. + + // PhysReg operands cannot have subregister indexes. + PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); + assert(PhysReg && "Invalid SubReg for physical register"); + MO.setSubReg(0); + } + // Rewrite. Note we could have used MachineOperand::substPhysReg(), but + // we need the inlining here. + MO.setReg(PhysReg); + } + + // Add any missing super-register kills after rewriting the whole + // instruction. + while (!SuperKills.empty()) + MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + + DEBUG(dbgs() << "> " << *MI); + + // Finally, remove any identity copies. + if (MI->isIdentityCopy()) { + DEBUG(dbgs() << "Deleting identity copy.\n"); + RemoveMachineInstrFromMaps(MI); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); + } + } + } + + // Tell MRI about physical registers in use. + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) + if (!MRI->reg_nodbg_empty(Reg)) + MRI->setPhysRegUsed(Reg); +} + void VirtRegMap::print(raw_ostream &OS, const Module* M) const { const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); const MachineRegisterInfo &MRI = MF->getRegInfo(); OS << "********** REGISTER MAP **********\n"; - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) { - if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG) - OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i]) - << "] " << MRI.getRegClass(i)->getName() << "\n"; + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { + OS << '[' << PrintReg(Reg, TRI) << " -> " + << PrintReg(Virt2PhysMap[Reg], TRI) << "] " + << MRI.getRegClass(Reg)->getName() << "\n"; + } } - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) - if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT) - OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] - << "] " << MRI.getRegClass(i)->getName() << "\n"; + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { + OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] + << "] " << MRI.getRegClass(Reg)->getName() << "\n"; + } + } OS << '\n'; } diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 8b6082d18193..ba50f4e42302 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -35,6 +35,7 @@ namespace llvm { class TargetInstrInfo; class TargetRegisterInfo; class raw_ostream; + class SlotIndexes; class VirtRegMap : public MachineFunctionPass { public: @@ -80,7 +81,7 @@ namespace llvm { /// Virt2SplitKillMap - This is splitted virtual register to its last use /// (kill) index mapping. - IndexedMap<SlotIndex> Virt2SplitKillMap; + IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap; /// ReMatMap - This is virtual register to re-materialized instruction /// mapping. Each virtual register whose definition is going to be @@ -134,6 +135,9 @@ namespace llvm { /// UnusedRegs - A list of physical registers that have not been used. BitVector UnusedRegs; + /// createSpillSlot - Allocate a spill slot for RC from MFI. + unsigned createSpillSlot(const TargetRegisterClass *RC); + VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT void operator=(const VirtRegMap&); // DO NOT IMPLEMENT @@ -153,10 +157,13 @@ namespace llvm { } MachineFunction &getMachineFunction() const { - assert(MF && "getMachineFunction called before runOnMAchineFunction"); + assert(MF && "getMachineFunction called before runOnMachineFunction"); return *MF; } + MachineRegisterInfo &getRegInfo() const { return *MRI; } + const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; } + void grow(); /// @brief returns true if the specified virtual register is @@ -207,10 +214,19 @@ namespace llvm { } /// @brief returns the live interval virtReg is split from. - unsigned getPreSplitReg(unsigned virtReg) { + unsigned getPreSplitReg(unsigned virtReg) const { return Virt2SplitMap[virtReg]; } + /// getOriginal - Return the original virtual register that VirtReg descends + /// from through splitting. + /// A register that was not created by splitting is its own original. + /// This operation is idempotent. + unsigned getOriginal(unsigned VirtReg) const { + unsigned Orig = getPreSplitReg(VirtReg); + return Orig ? Orig : VirtReg; + } + /// @brief returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. bool isAssignedReg(unsigned virtReg) const { @@ -426,12 +442,12 @@ namespace llvm { /// @brief Mark the specified register as being implicitly defined. void setIsImplicitlyDefined(unsigned VirtReg) { - ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister); + ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg)); } /// @brief Returns true if the virtual register is implicitly defined. bool isImplicitlyDefined(unsigned VirtReg) const { - return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister]; + return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)]; } /// @brief Updates information about the specified virtual register's value @@ -487,6 +503,13 @@ namespace llvm { return 0; } + /// rewrite - Rewrite all instructions in MF to use only physical registers + /// by mapping all virtual register operands to their assigned physical + /// registers. + /// + /// @param Indexes Optionally remove deleted instructions from indexes. + void rewrite(SlotIndexes *Indexes); + void print(raw_ostream &OS, const Module* M = 0) const; void dump() const; }; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 240d28cf3011..458a2134bf4a 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -22,8 +22,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include <algorithm> using namespace llvm; STATISTIC(NumDSE , "Number of dead stores elided"); @@ -216,7 +216,8 @@ public: << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); else DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); - DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n"); + DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) + << (CanClobber ? " canclobber" : "") << "\n"); } /// canClobberPhysRegForSS - Return true if the spiller is allowed to change @@ -297,7 +298,7 @@ ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, const TargetLowering *TL = MF.getTarget().getTargetLowering(); if (!TL->isTypeLegal(TL->getPointerTy())) - // Believe it or not, this is true on PIC16. + // Believe it or not, this is true on 16-bit targets like PIC16. return InsertLoc; const TargetRegisterClass *ptrRegClass = @@ -462,25 +463,70 @@ static void findSinglePredSuccessor(MachineBasicBlock *MBB, } } -/// InvalidateKill - Invalidate register kill information for a specific -/// register. This also unsets the kills marker on the last kill operand. -static void InvalidateKill(unsigned Reg, - const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector<MachineOperand*> &KillOps) { - if (RegKills[Reg]) { - KillOps[Reg]->setIsKill(false); - // KillOps[Reg] might be a def of a super-register. - unsigned KReg = KillOps[Reg]->getReg(); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { - if (RegKills[*SR]) { - KillOps[*SR]->setIsKill(false); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } - } +/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed +/// but not re-defined and it's being reused. Remove the kill flag for the +/// register and unset the kill's marker and last kill operand. +static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, + BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n"); + + MachineOperand *KillOp = KillOps[Reg]; + KillOp->setIsKill(false); + // KillOps[Reg] might be a def of a super-register. + unsigned KReg = KillOp->getReg(); + if (!RegKills[KReg]) + return; + + assert(KillOps[KReg] == KillOp && "invalid superreg kill flags"); + KillOps[KReg] = NULL; + RegKills.reset(KReg); + + // If it's a def of a super-register. Its other sub-regsters are no + // longer killed as well. + for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { + DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); + + assert(KillOps[*SR] == KillOp && "invalid subreg kill flags"); + KillOps[*SR] = NULL; + RegKills.reset(*SR); + } +} + +/// ResurrectKill - Invalidate kill info associated with a previous MI. An +/// optimization may have decided that it's safe to reuse a previously killed +/// register. If we fail to erase the invalid kill flags, then the register +/// scavenger may later clobber the register used by this MI. Note that this +/// must be done even if this MI is being deleted! Consider: +/// +/// USE $r1 (vreg1) <kill> +/// ... +/// $r1(vreg3) = COPY $r1 (vreg2) +/// +/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially +/// vreg1's only use is a kill. The rewriter doesn't know it should be live +/// until it rewrites vreg2. At that points it sees that the copy is dead and +/// deletes it. However, deleting the copy implicitly forwards liveness of $r1 +/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at +/// vreg1 before deleting the copy. +static void ResurrectKill(MachineInstr &MI, unsigned Reg, + const TargetRegisterInfo* TRI, BitVector &RegKills, + std::vector<MachineOperand*> &KillOps) { + if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { + ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); + return; + } + // No previous kill for this reg. Check for subreg kills as well. + // d4 = + // store d4, fi#0 + // ... + // = s8<kill> + // ... + // = d4 <avoiding reload> + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { + unsigned SReg = *SR; + if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) + ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps); } } @@ -502,15 +548,22 @@ static void InvalidateKills(MachineInstr &MI, KillRegs->push_back(Reg); assert(Reg < KillOps.size()); if (KillOps[Reg] == &MO) { + // This operand was the kill, now no longer. KillOps[Reg] = NULL; RegKills.reset(Reg); for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { if (RegKills[*SR]) { + assert(KillOps[*SR] == &MO && "bad subreg kill flags"); KillOps[*SR] = NULL; RegKills.reset(*SR); } } } + else { + // This operand may have reused a previously killed reg. Keep it live in + // case it continues to be used after erasing this instruction. + ResurrectKill(MI, Reg, TRI, RegKills, KillOps); + } } } @@ -578,44 +631,8 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, if (Reg == 0) continue; - if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { - // That can't be right. Register is killed but not re-defined and it's - // being reused. Let's fix that. - KillOps[Reg]->setIsKill(false); - // KillOps[Reg] might be a def of a super-register. - unsigned KReg = KillOps[Reg]->getReg(); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - - // Must be a def of a super-register. Its other sub-regsters are no - // longer killed as well. - for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } - } else { - // Check for subreg kills as well. - // d4 = - // store d4, fi#0 - // ... - // = s8<kill> - // ... - // = d4 <avoiding reload> - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - unsigned SReg = *SR; - if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) { - KillOps[SReg]->setIsKill(false); - unsigned KReg = KillOps[SReg]->getReg(); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - - for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) { - KillOps[*SSR] = NULL; - RegKills.reset(*SSR); - } - } - } - } + // This operand may have reused a previously killed reg. Keep it live. + ResurrectKill(MI, Reg, TRI, RegKills, KillOps); if (MO.isKill()) { RegKills.set(Reg); @@ -770,7 +787,8 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, NotAvailable.insert(Reg); else { MBB.addLiveIn(Reg); - InvalidateKill(Reg, TRI, RegKills, KillOps); + if (RegKills[Reg]) + ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); } // Skip over the same register. @@ -1056,6 +1074,7 @@ class LocalRewriter : public VirtRegRewriter { const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; VirtRegMap *VRM; + LiveIntervals *LIs; BitVector AllocatableRegs; DenseMap<MachineInstr*, unsigned> DistanceMap; DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues; @@ -1068,6 +1087,11 @@ public: LiveIntervals* LIs); private: + void EraseInstr(MachineInstr *MI) { + VRM->RemoveMachineInstrFromMaps(MI); + LIs->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } bool OptimizeByUnfold2(unsigned VirtReg, int SS, MachineBasicBlock::iterator &MII, @@ -1110,6 +1134,12 @@ private: bool InsertSpills(MachineInstr *MI); + void ProcessUses(MachineInstr &MI, AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + BitVector &RegKills, + ReuseInfo &ReusedOperands, + std::vector<MachineOperand*> &KillOps); + void RewriteMBB(LiveIntervals *LIs, AvailableSpills &Spills, BitVector &RegKills, std::vector<MachineOperand*> &KillOps); @@ -1117,17 +1147,18 @@ private: } bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, - LiveIntervals* LIs) { + LiveIntervals* lis) { MRI = &MF.getRegInfo(); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); VRM = &vrm; + LIs = lis; AllocatableRegs = TRI->getAllocatableSet(MF); DEBUG(dbgs() << "\n**** Local spiller rewriting function '" << MF.getFunction()->getName() << "':\n"); DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" " reloads!) ****\n"); - DEBUG(MF.dump()); + DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); // Spills - Keep track of which spilled values are available in physregs // so that we can choose to reuse the physregs instead of emitting @@ -1178,7 +1209,7 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, } DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.dump()); + DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); // Mark unused spill slots. MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1190,10 +1221,8 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, MFI->RemoveStackObject(SS); for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { MachineInstr *DVMI = DbgValues[j]; - MachineBasicBlock *DVMBB = DVMI->getParent(); DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); - VRM->RemoveMachineInstrFromMaps(DVMI); - DVMBB->erase(DVMI); + EraseInstr(DVMI); } ++NumDSS; } @@ -1273,8 +1302,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS, VRM->transferRestorePts(&MI, NewMIs[0]); MII = MBB->insert(MII, NewMIs[0]); InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); ++NumModRefUnfold; // Unfold next instructions that fold the same SS. @@ -1289,8 +1317,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS, VRM->transferRestorePts(&NextMI, NewMIs[0]); MBB->insert(NextMII, NewMIs[0]); InvalidateKills(NextMI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&NextMI); - MBB->erase(&NextMI); + EraseInstr(&NextMI); ++NumModRefUnfold; // Skip over dbg_value instructions. while (NextMII != MBB->end() && NextMII->isDebugValue()) @@ -1417,8 +1444,7 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); MII = FoldedMI; InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); return true; } } @@ -1524,14 +1550,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, // Delete all 3 old instructions. InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(ReloadMI); - MBB->erase(ReloadMI); + EraseInstr(ReloadMI); InvalidateKills(*DefMI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(DefMI); - MBB->erase(DefMI); + EraseInstr(DefMI); InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); // If NewReg was previously holding value of some SS, it's now clobbered. // This has to be done now because it's a physical register. When this @@ -1574,8 +1597,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, bool CheckDef = PrevMII != MBB->begin(); if (CheckDef) --PrevMII; - VRM->RemoveMachineInstrFromMaps(LastStore); - MBB->erase(LastStore); + EraseInstr(LastStore); if (CheckDef) { // Look at defs of killed registers on the store. Mark the defs // as dead since the store has been deleted and they aren't @@ -1586,8 +1608,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, MachineInstr *DeadDef = PrevMII; if (ReMatDefs.count(DeadDef) && !HasOtherDef) { // FIXME: This assumes a remat def does not have side effects. - VRM->RemoveMachineInstrFromMaps(DeadDef); - MBB->erase(DeadDef); + EraseInstr(DeadDef); ++NumDRM; } } @@ -1612,10 +1633,18 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII, /// effect and all of its defs are dead. static bool isSafeToDelete(MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); - if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() || + if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() || TID.isCall() || TID.isBarrier() || TID.isReturn() || - TID.hasUnmodeledSideEffects()) + MI.isLabel() || MI.isDebugValue() || + MI.hasUnmodeledSideEffects()) return false; + + // Technically speaking inline asm without side effects and no defs can still + // be deleted. But there is so much bad inline asm code out there, we should + // let them be. + if (MI.isInlineAsm()) + return false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.getReg()) @@ -1675,8 +1704,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills, LastUD->setIsDead(); break; } - VRM->RemoveMachineInstrFromMaps(LastUDMI); - MBB->erase(LastUDMI); + EraseInstr(LastUDMI); } else { LastUD->setIsKill(); RegKills.set(Reg); @@ -1764,6 +1792,10 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, << TRI->getName(InReg) << " for vreg" << VirtReg <<" instead of reloading into physreg " << TRI->getName(Phys) << '\n'); + + // Reusing a physreg may resurrect it. But we expect ProcessUses to update + // the kill flags for the current instruction after processing it. + ++NumOmitted; continue; } else if (InReg && InReg != Phys) { @@ -1828,7 +1860,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, return true; } -/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return +/// InsertSpills - Insert spills after MI if requested by VRM. Return /// true if spills were inserted. bool LocalRewriter::InsertSpills(MachineInstr *MI) { if (!VRM->isSpillPt(MI)) @@ -1856,6 +1888,349 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) { } +/// ProcessUses - Process all of MI's spilled operands and all available +/// operands. +void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, + std::vector<MachineInstr*> &MaybeDeadStores, + BitVector &RegKills, + ReuseInfo &ReusedOperands, + std::vector<MachineOperand*> &KillOps) { + // Clear kill info. + SmallSet<unsigned, 2> KilledMIRegs; + SmallVector<unsigned, 4> VirtUseOps; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.getReg() == 0) + continue; // Ignore non-register operands. + + unsigned VirtReg = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { + // Ignore physregs for spilling, but remember that it is used by this + // function. + MRI->setPhysRegUsed(VirtReg); + continue; + } + + // We want to process implicit virtual register uses first. + if (MO.isImplicit()) + // If the virtual register is implicitly defined, emit a implicit_def + // before so scavenger knows it's "defined". + // FIXME: This is a horrible hack done the by register allocator to + // remat a definition with virtual register operand. + VirtUseOps.insert(VirtUseOps.begin(), i); + else + VirtUseOps.push_back(i); + + // A partial def causes problems because the same operand both reads and + // writes the register. This rewriter is designed to rewrite uses and defs + // separately, so a partial def would already have been rewritten to a + // physreg by the time we get to processing defs. + // Add an implicit use operand to model the partial def. + if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) && + MI.findRegisterUseOperandIdx(VirtReg) == -1) { + VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands()); + MI.addOperand(MachineOperand::CreateReg(VirtReg, + false, // isDef + true)); // isImplicit + DEBUG(dbgs() << "Partial redef: " << MI); + } + } + + // Process all of the spilled uses and all non spilled reg references. + SmallVector<int, 2> PotentialDeadStoreSlots; + KilledMIRegs.clear(); + for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { + unsigned i = VirtUseOps[j]; + unsigned VirtReg = MI.getOperand(i).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register?"); + + unsigned SubIdx = MI.getOperand(i).getSubReg(); + if (VRM->isAssignedReg(VirtReg)) { + // This virtual register was assigned a physreg! + unsigned Phys = VRM->getPhys(VirtReg); + MRI->setPhysRegUsed(Phys); + if (MI.getOperand(i).isDef()) + ReusedOperands.markClobbered(Phys); + substitutePhysReg(MI.getOperand(i), Phys, *TRI); + if (VRM->isImplicitlyDefined(VirtReg)) + // FIXME: Is this needed? + BuildMI(*MBB, &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), Phys); + continue; + } + + // This virtual register is now known to be a spilled value. + if (!MI.getOperand(i).isUse()) + continue; // Handle defs in the loop below (handle use&def here though) + + bool AvoidReload = MI.getOperand(i).isUndef(); + // Check if it is defined by an implicit def. It should not be spilled. + // Note, this is for correctness reason. e.g. + // 8 %reg1024<def> = IMPLICIT_DEF + // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 + // The live range [12, 14) are not part of the r1024 live interval since + // it's defined by an implicit def. It will not conflicts with live + // interval of r1025. Now suppose both registers are spilled, you can + // easily see a situation where both registers are reloaded before + // the INSERT_SUBREG and both target registers that would overlap. + bool DoReMat = VRM->isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); + int ReuseSlot = SSorRMId; + + // Check to see if this stack slot is available. + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + + // If this is a sub-register use, make sure the reuse register is in the + // right register class. For example, for x86 not all of the 32-bit + // registers have accessible sub-registers. + // Similarly so for EXTRACT_SUBREG. Consider this: + // EDI = op + // MOV32_mr fi#1, EDI + // ... + // = EXTRACT_SUBREG fi#1 + // fi#1 is available in EDI, but it cannot be reused because it's not in + // the right register file. + if (PhysReg && !AvoidReload && SubIdx) { + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + if (!RC->contains(PhysReg)) + PhysReg = 0; + } + + if (PhysReg && !AvoidReload) { + // This spilled operand might be part of a two-address operand. If this + // is the case, then changing it will necessarily require changing the + // def part of the instruction as well. However, in some cases, we + // aren't allowed to modify the reused register. If none of these cases + // apply, reuse it. + bool CanReuse = true; + bool isTied = MI.isRegTiedToDefOperand(i); + if (isTied) { + // Okay, we have a two address operand. We can reuse this physreg as + // long as we are allowed to clobber the value and there isn't an + // earlier def that has already clobbered the physreg. + CanReuse = !ReusedOperands.isClobbered(PhysReg) && + Spills.canClobberPhysReg(PhysReg); + } + // If this is an asm, and a PhysReg alias is used elsewhere as an + // earlyclobber operand, we can't also use it as an input. + if (MI.isInlineAsm()) { + for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { + MachineOperand &MOk = MI.getOperand(k); + if (MOk.isReg() && MOk.isEarlyClobber() && + TRI->regsOverlap(MOk.getReg(), PhysReg)) { + CanReuse = false; + DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg << ": " << MOk << '\n'); + break; + } + } + } + + if (CanReuse) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DEBUG(dbgs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " + << TRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(VRM->getPhys(VirtReg)) << '\n'); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + + // Reusing a physreg may resurrect it. But we expect ProcessUses to + // update the kill flags for the current instr after processing it. + + // The only technical detail we have is that we don't know that + // PhysReg won't be clobbered by a reloaded stack slot that occurs + // later in the instruction. In particular, consider 'op V1, V2'. + // If V1 is available in physreg R0, we would choose to reuse it + // here, instead of reloading it into the register the allocator + // indicated (say R1). However, V2 might have to be reloaded + // later, and it might indicate that it needs to live in R0. When + // this occurs, we need to have information available that + // indicates it is safe to use R1 for the reload instead of R0. + // + // To further complicate matters, we might conflict with an alias, + // or R0 and R1 might not be compatible with each other. In this + // case, we actually insert a reload for V1 in R1, ensuring that + // we can get at R0 or its alias. + ReusedOperands.addReuse(i, ReuseSlot, PhysReg, + VRM->getPhys(VirtReg), VirtReg); + if (isTied) + // Only mark it clobbered if this is a use&def operand. + ReusedOperands.markClobbered(PhysReg); + ++NumReused; + + if (MI.getOperand(i).isKill() && + ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { + + // The store of this spilled value is potentially dead, but we + // won't know for certain until we've confirmed that the re-use + // above is valid, which means waiting until the other operands + // are processed. For now we just track the spill slot, we'll + // remove it after the other operands are processed if valid. + + PotentialDeadStoreSlots.push_back(ReuseSlot); + } + + // Mark is isKill if it's there no other uses of the same virtual + // register and it's not a two-address operand. IsKill will be + // unset if reg is reused. + if (!isTied && KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + continue; + } // CanReuse + + // Otherwise we have a situation where we have a two-address instruction + // whose mod/ref operand needs to be reloaded. This reload is already + // available in some register "PhysReg", but if we used PhysReg as the + // operand to our 2-addr instruction, the instruction would modify + // PhysReg. This isn't cool if something later uses PhysReg and expects + // to get its initial value. + // + // To avoid this problem, and to avoid doing a load right after a store, + // we emit a copy from PhysReg into the designated register for this + // operand. + // + // This case also applies to an earlyclobber'd PhysReg. + unsigned DesignatedReg = VRM->getPhys(VirtReg); + assert(DesignatedReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + DesignatedReg = ReusedOperands. + GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, + MaybeDeadStores, RegKills, KillOps, *VRM); + + // If the mapped designated register is actually the physreg we have + // incoming, we don't need to inserted a dead copy. + if (DesignatedReg == PhysReg) { + // If this stack slot value is already available, reuse it! + if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) + DEBUG(dbgs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); + else + DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); + DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg + << " instead of reloading into same physreg.\n"); + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + ReusedOperands.markClobbered(RReg); + ++NumReused; + continue; + } + + MRI->setPhysRegUsed(DesignatedReg); + ReusedOperands.markClobbered(DesignatedReg); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, *MBB->getParent()); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), + DesignatedReg).addReg(PhysReg); + CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); + UpdateKills(*CopyMI, TRI, RegKills, KillOps); + + // This invalidates DesignatedReg. + Spills.ClobberPhysReg(DesignatedReg); + + Spills.addAvailable(ReuseSlot, DesignatedReg); + unsigned RReg = + SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); + ++NumReused; + continue; + } // if (PhysReg) + + // Otherwise, reload it and remember that we have it. + PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg && "Must map virtreg to physreg!"); + + // Note that, if we reused a register for a previous operand, the + // register we want to reload into might not actually be + // available. If this occurs, use the register indicated by the + // reuser. + if (ReusedOperands.hasReuses()) + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, *VRM); + + MRI->setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + if (AvoidReload) + ++NumAvoided; + else { + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, *MBB->getParent()); + + if (DoReMat) { + ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); + } else { + const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); + TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); + MachineInstr *LoadMI = prior(InsertLoc); + VRM->addSpillSlotUse(SSorRMId, LoadMI); + ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); + } + // This invalidates PhysReg. + Spills.ClobberPhysReg(PhysReg); + + // Any stores to this stack slot are not dead anymore. + if (!DoReMat) + MaybeDeadStores[SSorRMId] = NULL; + Spills.addAvailable(SSorRMId, PhysReg); + // Assumes this is the last use. IsKill will be unset if reg is reused + // unless it's a two-address operand. + if (!MI.isRegTiedToDefOperand(i) && + KilledMIRegs.count(VirtReg) == 0) { + MI.getOperand(i).setIsKill(); + KilledMIRegs.insert(VirtReg); + } + + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(dbgs() << '\t' << *prior(InsertLoc)); + } + unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; + MI.getOperand(i).setReg(RReg); + MI.getOperand(i).setSubReg(0); + } + + // Ok - now we can remove stores that have been confirmed dead. + for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { + // This was the last use and the spilled value is still available + // for reuse. That means the spill was unnecessary! + int PDSSlot = PotentialDeadStoreSlots[j]; + MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; + if (DeadStore) { + DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); + InvalidateKills(*DeadStore, TRI, RegKills, KillOps); + EraseInstr(DeadStore); + MaybeDeadStores[PDSSlot] = NULL; + ++NumDSE; + } + } +} + /// rewriteMBB - Keep track of which spills are available even after the /// register allocator is done with them. If possible, avoid reloading vregs. void @@ -1880,9 +2255,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // ReMatDefs - These are rematerializable def MIs which are not deleted. SmallSet<MachineInstr*, 4> ReMatDefs; - // Clear kill info. - SmallSet<unsigned, 2> KilledMIRegs; - // Keep track of the registers we have already spilled in case there are // multiple defs of the same register in MI. SmallSet<unsigned, 8> SpilledMIRegs; @@ -1918,323 +2290,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, /// ReusedOperands - Keep track of operand reuse in case we need to undo /// reuse. ReuseInfo ReusedOperands(MI, TRI); - SmallVector<unsigned, 4> VirtUseOps; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; // Ignore non-register operands. - - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { - // Ignore physregs for spilling, but remember that it is used by this - // function. - MRI->setPhysRegUsed(VirtReg); - continue; - } - - // We want to process implicit virtual register uses first. - if (MO.isImplicit()) - // If the virtual register is implicitly defined, emit a implicit_def - // before so scavenger knows it's "defined". - // FIXME: This is a horrible hack done the by register allocator to - // remat a definition with virtual register operand. - VirtUseOps.insert(VirtUseOps.begin(), i); - else - VirtUseOps.push_back(i); - } - - // Process all of the spilled uses and all non spilled reg references. - SmallVector<int, 2> PotentialDeadStoreSlots; - KilledMIRegs.clear(); - for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { - unsigned i = VirtUseOps[j]; - unsigned VirtReg = MI.getOperand(i).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register?"); - - unsigned SubIdx = MI.getOperand(i).getSubReg(); - if (VRM->isAssignedReg(VirtReg)) { - // This virtual register was assigned a physreg! - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - if (MI.getOperand(i).isDef()) - ReusedOperands.markClobbered(Phys); - substitutePhysReg(MI.getOperand(i), Phys, *TRI); - if (VRM->isImplicitlyDefined(VirtReg)) - // FIXME: Is this needed? - BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), Phys); - continue; - } - - // This virtual register is now known to be a spilled value. - if (!MI.getOperand(i).isUse()) - continue; // Handle defs in the loop below (handle use&def here though) - - bool AvoidReload = MI.getOperand(i).isUndef(); - // Check if it is defined by an implicit def. It should not be spilled. - // Note, this is for correctness reason. e.g. - // 8 %reg1024<def> = IMPLICIT_DEF - // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - int ReuseSlot = SSorRMId; - - // Check to see if this stack slot is available. - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - - // If this is a sub-register use, make sure the reuse register is in the - // right register class. For example, for x86 not all of the 32-bit - // registers have accessible sub-registers. - // Similarly so for EXTRACT_SUBREG. Consider this: - // EDI = op - // MOV32_mr fi#1, EDI - // ... - // = EXTRACT_SUBREG fi#1 - // fi#1 is available in EDI, but it cannot be reused because it's not in - // the right register file. - if (PhysReg && !AvoidReload && SubIdx) { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - if (!RC->contains(PhysReg)) - PhysReg = 0; - } - - if (PhysReg && !AvoidReload) { - // This spilled operand might be part of a two-address operand. If this - // is the case, then changing it will necessarily require changing the - // def part of the instruction as well. However, in some cases, we - // aren't allowed to modify the reused register. If none of these cases - // apply, reuse it. - bool CanReuse = true; - bool isTied = MI.isRegTiedToDefOperand(i); - if (isTied) { - // Okay, we have a two address operand. We can reuse this physreg as - // long as we are allowed to clobber the value and there isn't an - // earlier def that has already clobbered the physreg. - CanReuse = !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg); - } - // If this is an asm, and a PhysReg alias is used elsewhere as an - // earlyclobber operand, we can't also use it as an input. - if (MI.isInlineAsm()) { - for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { - MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.isEarlyClobber() && - TRI->regsOverlap(MOk.getReg(), PhysReg)) { - CanReuse = false; - DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg << ": " << MOk << '\n'); - break; - } - } - } - - if (CanReuse) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(VRM->getPhys(VirtReg)) << '\n'); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - // The only technical detail we have is that we don't know that - // PhysReg won't be clobbered by a reloaded stack slot that occurs - // later in the instruction. In particular, consider 'op V1, V2'. - // If V1 is available in physreg R0, we would choose to reuse it - // here, instead of reloading it into the register the allocator - // indicated (say R1). However, V2 might have to be reloaded - // later, and it might indicate that it needs to live in R0. When - // this occurs, we need to have information available that - // indicates it is safe to use R1 for the reload instead of R0. - // - // To further complicate matters, we might conflict with an alias, - // or R0 and R1 might not be compatible with each other. In this - // case, we actually insert a reload for V1 in R1, ensuring that - // we can get at R0 or its alias. - ReusedOperands.addReuse(i, ReuseSlot, PhysReg, - VRM->getPhys(VirtReg), VirtReg); - if (isTied) - // Only mark it clobbered if this is a use&def operand. - ReusedOperands.markClobbered(PhysReg); - ++NumReused; - - if (MI.getOperand(i).isKill() && - ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { - - // The store of this spilled value is potentially dead, but we - // won't know for certain until we've confirmed that the re-use - // above is valid, which means waiting until the other operands - // are processed. For now we just track the spill slot, we'll - // remove it after the other operands are processed if valid. - - PotentialDeadStoreSlots.push_back(ReuseSlot); - } - - // Mark is isKill if it's there no other uses of the same virtual - // register and it's not a two-address operand. IsKill will be - // unset if reg is reused. - if (!isTied && KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - - continue; - } // CanReuse - - // Otherwise we have a situation where we have a two-address instruction - // whose mod/ref operand needs to be reloaded. This reload is already - // available in some register "PhysReg", but if we used PhysReg as the - // operand to our 2-addr instruction, the instruction would modify - // PhysReg. This isn't cool if something later uses PhysReg and expects - // to get its initial value. - // - // To avoid this problem, and to avoid doing a load right after a store, - // we emit a copy from PhysReg into the designated register for this - // operand. - // - // This case also applies to an earlyclobber'd PhysReg. - unsigned DesignatedReg = VRM->getPhys(VirtReg); - assert(DesignatedReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands. - GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, - MaybeDeadStores, RegKills, KillOps, *VRM); - - // If the mapped designated register is actually the physreg we have - // incoming, we don't need to inserted a dead copy. - if (DesignatedReg == PhysReg) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg - << " instead of reloading into same physreg.\n"); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - ReusedOperands.markClobbered(RReg); - ++NumReused; - continue; - } - - MRI->setPhysRegUsed(DesignatedReg); - ReusedOperands.markClobbered(DesignatedReg); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, MF); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY), - DesignatedReg).addReg(PhysReg); - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - // This invalidates DesignatedReg. - Spills.ClobberPhysReg(DesignatedReg); - - Spills.addAvailable(ReuseSlot, DesignatedReg); - unsigned RReg = - SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - DEBUG(dbgs() << '\t' << *prior(MII)); - ++NumReused; - continue; - } // if (PhysReg) - - // Otherwise, reload it and remember that we have it. - PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - - MRI->setPhysRegUsed(PhysReg); - ReusedOperands.markClobbered(PhysReg); - if (AvoidReload) - ++NumAvoided; - else { - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, MF); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - // This invalidates PhysReg. - Spills.ClobberPhysReg(PhysReg); - - // Any stores to this stack slot are not dead anymore. - if (!DoReMat) - MaybeDeadStores[SSorRMId] = NULL; - Spills.addAvailable(SSorRMId, PhysReg); - // Assumes this is the last use. IsKill will be unset if reg is reused - // unless it's a two-address operand. - if (!MI.isRegTiedToDefOperand(i) && - KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - } - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } - - // Ok - now we can remove stores that have been confirmed dead. - for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { - // This was the last use and the spilled value is still available - // for reuse. That means the spill was unnecessary! - int PDSSlot = PotentialDeadStoreSlots[j]; - MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; - if (DeadStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(DeadStore); - MBB->erase(DeadStore); - MaybeDeadStores[PDSSlot] = NULL; - ++NumDSE; - } - } + ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps); DEBUG(dbgs() << '\t' << MI); @@ -2288,14 +2345,13 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, BackTracked = true; } else { DEBUG(dbgs() << "Removing now-noop copy: " << MI); - // Unset last kill since it's being reused. - InvalidateKill(InReg, TRI, RegKills, KillOps); + // InvalidateKills resurrects any prior kill of the copy's source + // allowing the source reg to be reused in place of the copy. Spills.disallowClobberPhysReg(InReg); } InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); Erased = true; goto ProcessNextInst; } @@ -2306,8 +2362,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ MBB->insert(MII, NewMIs[0]); InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); Erased = true; --NextMII; // backtrack to the unfolded instruction. BackTracked = true; @@ -2343,8 +2398,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, MBB->insert(MII, NewStore); VRM->addSpillSlotUse(SS, NewStore); InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); Erased = true; --NextMII; --NextMII; // backtrack to the unfolded instruction. @@ -2359,8 +2413,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // If we get here, the store is dead, nuke it now. DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(DeadStore); - MBB->erase(DeadStore); + EraseInstr(DeadStore); if (!NewStore) ++NumDSE; } @@ -2437,8 +2490,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Last def is now dead. TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); } - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); Erased = true; Spills.disallowClobberPhysReg(VirtReg); goto ProcessNextInst; @@ -2514,8 +2566,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, ++NumDCE; DEBUG(dbgs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); Erased = true; UpdateKills(*LastStore, TRI, RegKills, KillOps); goto ProcessNextInst; @@ -2526,8 +2577,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Delete dead instructions without side effects. if (!Erased && !BackTracked && isSafeToDelete(MI)) { InvalidateKills(MI, TRI, RegKills, KillOps); - VRM->RemoveMachineInstrFromMaps(&MI); - MBB->erase(&MI); + EraseInstr(&MI); Erased = true; } if (!Erased) |