diff options
Diffstat (limited to 'lib/CodeGen')
191 files changed, 17212 insertions, 9353 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 44345addbcb6..69c368524a74 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -111,18 +110,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); } - - -AggressiveAntiDepBreaker:: -AggressiveAntiDepBreaker(MachineFunction& MFi, - const RegisterClassInfo &RCI, - TargetSubtargetInfo::RegClassVector& CriticalPathRCs) : - AntiDepBreaker(), MF(MFi), - MRI(MF.getRegInfo()), - TII(MF.getTarget().getInstrInfo()), - TRI(MF.getTarget().getRegisterInfo()), - RegClassInfo(RCI), - State(nullptr) { +AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( + MachineFunction &MFi, const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) + : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getSubtarget().getInstrInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), + State(nullptr) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -262,11 +256,8 @@ static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { - unsigned Reg = P->getReg(); - if (RegSet.count(Reg) == 0) { + if (RegSet.insert(P->getReg()).second) Edges.push_back(&*P); - RegSet.insert(Reg); - } } } } @@ -527,7 +518,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { BV &= RCBV; } - DEBUG(dbgs() << " " << RC->getName()); + DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); } return BV; @@ -620,8 +611,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG(dbgs() << "\tFind Registers:"); - if (RenameOrder.count(SuperRC) == 0) - RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); unsigned OrigR = RenameOrder[SuperRC]; unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); @@ -683,6 +673,21 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( goto next_super_reg; } + // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also + // defines 'NewReg' via an early-clobber operand. + auto Range = RegRefs.equal_range(Reg); + for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) { + auto UseMI = Q->second.Operand->getParent(); + int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI); + if (Idx == -1) + continue; + + if (UseMI->getOperand(Idx).isEarlyClobber()) { + DEBUG(dbgs() << "(ec)"); + goto next_super_reg; + } + } + // Record that 'Reg' can be renamed to 'NewReg'. RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 2ab9d89574e4..12cf95b9b4f9 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H -#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" @@ -32,88 +32,83 @@ namespace llvm { class RegisterClassInfo; - /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { public: - /// RegisterReference - Information about a register reference - /// within a liverange + /// Information about a register reference within a liverange typedef struct { - /// Operand - The registers operand + /// The registers operand MachineOperand *Operand; - /// RC - The register class + /// The register class const TargetRegisterClass *RC; } RegisterReference; private: - /// NumTargetRegs - Number of non-virtual target registers - /// (i.e. TRI->getNumRegs()). + /// Number of non-virtual target registers (i.e. TRI->getNumRegs()). const unsigned NumTargetRegs; - /// GroupNodes - Implements a disjoint-union data structure to + /// Implements a disjoint-union data structure to /// form register groups. A node is represented by an index into /// the vector. A node can "point to" itself to indicate that it /// is the parent of a group, or point to another node to indicate /// that it is a member of the same group as that node. std::vector<unsigned> GroupNodes; - /// GroupNodeIndices - For each register, the index of the GroupNode + /// For each register, the index of the GroupNode /// currently representing the group that the register belongs to. /// Register 0 is always represented by the 0 group, a group /// composed of registers that are not eligible for anti-aliasing. std::vector<unsigned> GroupNodeIndices; - /// RegRefs - Map registers to all their references within a live range. + /// Map registers to all their references within a live range. std::multimap<unsigned, RegisterReference> RegRefs; - /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. std::vector<unsigned> KillIndices; - /// DefIndices - The index of the most recent complete def (proceding bottom + /// The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. std::vector<unsigned> DefIndices; public: AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); - /// GetKillIndices - Return the kill indices. + /// Return the kill indices. std::vector<unsigned> &GetKillIndices() { return KillIndices; } - /// GetDefIndices - Return the define indices. + /// Return the define indices. std::vector<unsigned> &GetDefIndices() { return DefIndices; } - /// GetRegRefs - Return the RegRefs map. + /// Return the RegRefs map. std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; } - // GetGroup - Get the group for a register. The returned value is + // Get the group for a register. The returned value is // the index of the GroupNode representing the group. unsigned GetGroup(unsigned Reg); - // GetGroupRegs - Return a vector of the registers belonging to a - // group. If RegRefs is non-NULL then only included referenced registers. + // Return a vector of the registers belonging to a group. + // If RegRefs is non-NULL then only included referenced registers. void GetGroupRegs( unsigned Group, std::vector<unsigned> &Regs, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs); - // UnionGroups - Union Reg1's and Reg2's groups to form a new - // group. Return the index of the GroupNode representing the - // group. + // Union Reg1's and Reg2's groups to form a new group. + // Return the index of the GroupNode representing the group. unsigned UnionGroups(unsigned Reg1, unsigned Reg2); - // LeaveGroup - Remove a register from its current group and place + // Remove a register from its current group and place // it alone in its own group. Return the index of the GroupNode // representing the registers new group. unsigned LeaveGroup(unsigned Reg); - /// IsLive - Return true if Reg is live + /// Return true if Reg is live. bool IsLive(unsigned Reg); }; - /// Class AggressiveAntiDepBreaker class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; @@ -121,12 +116,11 @@ class RegisterClassInfo; const TargetRegisterInfo *TRI; const RegisterClassInfo &RegClassInfo; - /// CriticalPathSet - The set of registers that should only be + /// The set of registers that should only be /// renamed if they are on the critical path. BitVector CriticalPathSet; - /// State - The state used to identify and rename anti-dependence - /// registers. + /// The state used to identify and rename anti-dependence registers. AggressiveAntiDepState *State; public: @@ -135,11 +129,10 @@ class RegisterClassInfo; TargetSubtargetInfo::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); - /// Start - Initialize anti-dep breaking for a new basic block. + /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical - /// path + /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, @@ -148,24 +141,24 @@ class RegisterClassInfo; unsigned InsertPosIndex, DbgValueVector &DbgValues) override; - /// Observe - Update liveness information to account for the current + /// Update liveness information to account for the current /// instruction, which will not be scheduled. /// void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) override; - /// Finish - Finish anti-dep breaking for a basic block. + /// Finish anti-dep breaking for a basic block. void FinishBlock() override; private: /// Keep track of a position in the allocation order for each regclass. typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; - /// IsImplicitDefUse - Return true if MO represents a register + /// Return true if MO represents a register /// that is both implicitly used and defined in MI bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); - /// GetPassthruRegs - If MI implicitly def/uses a register, then + /// If MI implicitly def/uses a register, then /// return that register and all subregisters. void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 64ff2a7ce836..1e4eaa76ee7f 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H -#define LLVM_CODEGEN_ALLOCATIONORDER_H +#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H +#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 0eabee30e964..2e8af9eb0048 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -25,12 +25,14 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + using namespace llvm; -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// +/// Compute the linearized index of a member in a nested aggregate/struct/array +/// by recursing and accumulating CurIndex as long as there are indices in the +/// index list. unsigned llvm::ComputeLinearIndex(Type *Ty, const unsigned *Indices, const unsigned *IndicesEnd, @@ -49,16 +51,23 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); } + assert(!Indices && "Unexpected out of bound"); return CurIndex; } // Given an array type, recursively traverse the elements. else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Type *EltTy = ATy->getElementType(); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { - if (Indices && *Indices == i) - return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(EltTy, nullptr, nullptr, CurIndex); + unsigned NumElts = ATy->getNumElements(); + // Compute the Linear offset when jumping one element of the array + unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0); + if (Indices) { + assert(*Indices < NumElts && "Unexpected out of bound"); + // If the indice is inside the array, compute the index to the requested + // elt and recurse inside the element with the end of the indices list + CurIndex += EltLinearOffset* *Indices; + return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); } + CurIndex += EltLinearOffset*NumElts; return CurIndex; } // We haven't found the type we're looking for, so keep searching. @@ -106,15 +115,16 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, } /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -GlobalVariable *llvm::ExtractTypeInfo(Value *V) { +GlobalValue *llvm::ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); - GlobalVariable *GV = dyn_cast<GlobalVariable>(V); + GlobalValue *GV = dyn_cast<GlobalValue>(V); + GlobalVariable *Var = dyn_cast<GlobalVariable>(V); - if (GV && GV->getName() == "llvm.eh.catch.all.value") { - assert(GV->hasInitializer() && + if (Var && Var->getName() == "llvm.eh.catch.all.value") { + assert(Var->hasInitializer() && "The EH catch-all value must have an initializer"); - Value *Init = GV->getInitializer(); - GV = dyn_cast<GlobalVariable>(Init); + Value *Init = Var->getInitializer(); + GV = dyn_cast<GlobalValue>(Init); if (!GV) V = cast<ConstantPointerNull>(Init); } @@ -508,8 +518,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { return false; } - return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, - *TM.getTargetLowering()); + return returnTypeIsEligibleForTailCall( + ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, @@ -606,3 +616,29 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, return true; } + +bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { + if (!GV->hasLinkOnceODRLinkage()) + return false; + + if (GV->hasUnnamedAddr()) + return true; + + // If it is a non constant variable, it needs to be uniqued across shared + // objects. + if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) { + if (!Var->isConstant()) + return false; + } + + // An alias can point to a variable. We could try to resolve the alias to + // decide, but for now just don't hide them. + if (isa<GlobalAlias>(GV)) + return false; + + GlobalStatus GS; + if (GlobalStatus::analyzeGlobal(GV, GS)) + return false; + + return !GS.IsCompared; +} diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index df47f984d578..a61a8efa4da0 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H -#define LLVM_CODEGEN_ANTIDEPBREAKER_H +#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,9 +25,8 @@ namespace llvm { -/// AntiDepBreaker - This class works into conjunction with the -/// post-RA scheduler to rename registers to break register -/// anti-dependencies. +/// This class works in conjunction with the post-RA scheduler to rename +/// registers to break register anti-dependencies (WAR hazards). class AntiDepBreaker { public: typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > @@ -35,29 +34,26 @@ public: virtual ~AntiDepBreaker(); - /// Start - Initialize anti-dep breaking for a new basic block. + /// Initialize anti-dep breaking for a new basic block. virtual void StartBlock(MachineBasicBlock *BB) =0; - /// BreakAntiDependencies - Identifiy anti-dependencies within a - /// basic-block region and break them by renaming registers. Return - /// the number of anti-dependencies broken. - /// + /// Identifiy anti-dependencies within a basic-block region and break them by + /// renaming registers. Return the number of anti-dependencies broken. virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) = 0; - /// Observe - Update liveness information to account for the current + /// Update liveness information to account for the current /// instruction, which will not be scheduled. - /// virtual void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) =0; - /// Finish - Finish anti-dep breaking for a basic block. + /// Finish anti-dep breaking for a basic block. virtual void FinishBlock() =0; - /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating + /// Update DBG_VALUE if dependency breaker is updating /// other machine instruction to use NewReg. void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) { assert (MI->isDebugValue() && "MI is not DBG_VALUE!"); diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 251f5effd6b4..66c6c6335407 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -108,7 +108,7 @@ void ARMException::endFunction(const MachineFunction *) { } void ARMException::emitTypeInfos(unsigned TTypeEncoding) { - const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); @@ -121,9 +121,9 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector<const GlobalVariable *>::const_reverse_iterator + for (std::vector<const GlobalValue *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalVariable *GV = *I; + const GlobalValue *GV = *I; if (VerboseAsm) Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h index 42757d732fd5..802e050cdf6e 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ -#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H #include "llvm/ADT/DenseMap.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 424e759caa8c..bbed8081a148 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -14,11 +14,13 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "Win64Exception.h" #include "WinCodeViewLineTables.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -49,7 +51,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -98,11 +99,10 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, } AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) - : MachineFunctionPass(ID), - TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), - OutContext(Streamer.getContext()), - OutStreamer(Streamer), - LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), + MII(tm.getSubtargetImpl()->getInstrInfo()), + OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr), + LastFn(0), Counter(~0U), SetCounter(0) { DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; CurrentFnSym = CurrentFnSymForSize = nullptr; GCMetadataPrinters = nullptr; @@ -129,12 +129,12 @@ unsigned AsmPrinter::getFunctionNumber() const { } const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { - return TM.getTargetLowering()->getObjFileLowering(); + return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering(); } /// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { - return *TM.getDataLayout(); + return *TM.getSubtargetImpl()->getDataLayout(); } const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { @@ -173,9 +173,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - OutStreamer.InitSections(); + OutStreamer.InitSections(false); - Mang = new Mangler(TM.getDataLayout()); + Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); // Emit the version-min deplyment target directive if needed. // @@ -210,7 +210,7 @@ bool AsmPrinter::doInitialization(Module &M) { assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (auto &I : *MI) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) - MP->beginAssembly(*this); + MP->beginAssembly(M, *MI, *this); // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { @@ -222,11 +222,22 @@ bool AsmPrinter::doInitialization(Module &M) { } if (MAI->doesSupportDebugInformation()) { + bool skip_dwarf = false; if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) { Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), DbgTimerName, CodeViewLineTablesGroupName)); - } else { + // FIXME: Don't emit DWARF debug info if there's at least one function + // with AddressSanitizer instrumentation. + // This is a band-aid fix for PR22032. + for (auto &F : M.functions()) { + if (F.hasFnAttribute(Attribute::SanitizeAddress)) { + skip_dwarf = true; + break; + } + } + } + if (!skip_dwarf) { DD = new DwarfDebug(this, &M); Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); } @@ -243,8 +254,14 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::ARM: ES = new ARMException(this); break; - case ExceptionHandling::WinEH: - ES = new Win64Exception(this); + case ExceptionHandling::ItaniumWinEH: + case ExceptionHandling::MSVC: + switch (MAI->getWinEHEncodingType()) { + default: llvm_unreachable("unsupported unwinding information encoding"); + case WinEH::EncodingType::Itanium: + ES = new Win64Exception(this); + break; + } break; } if (ES) @@ -253,33 +270,10 @@ bool AsmPrinter::doInitialization(Module &M) { } static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { - GlobalValue::LinkageTypes Linkage = GV->getLinkage(); - if (Linkage != GlobalValue::LinkOnceODRLinkage) - return false; - if (!MAI.hasWeakDefCanBeHiddenDirective()) return false; - if (GV->hasUnnamedAddr()) - return true; - - // This is only used for MachO, so right now it doesn't really matter how - // we handle alias. Revisit this once the MachO linker implements aliases. - if (isa<GlobalAlias>(GV)) - return false; - - // If it is a non constant variable, it needs to be uniqued across shared - // objects. - if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) { - if (!Var->isConstant()) - return false; - } - - GlobalStatus GS; - if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) - return true; - - return false; + return canBeOmittedFromSymbolTable(GV); } void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -356,12 +350,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (!GV->hasInitializer()) // External globals require no extra code. return; + GVSym->redefineIfPossible(); + if (GVSym->isDefined() || GVSym->isVariable()) + report_fatal_error("symbol '" + Twine(GVSym->getName()) + + "' is already defined"); + if (MAI->hasDotTypeDotSizeDirective()) OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -528,6 +527,10 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.GetCommentOS() << '\n'; } + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getPrefixData()); + // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. EmitFunctionEntryLabel(); @@ -548,21 +551,26 @@ void AsmPrinter::EmitFunctionHeader() { HI.Handler->beginFunction(MF); } - // Emit the prefix data. - if (F->hasPrefixData()) - EmitGlobalConstant(F->getPrefixData()); + // Emit the prologue data. + if (F->hasPrologueData()) + EmitGlobalConstant(F->getPrologueData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the /// function. This can be overridden by targets as required to do custom stuff. void AsmPrinter::EmitFunctionEntryLabel() { + CurrentFnSym->redefineIfPossible(); + // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. - if (CurrentFnSym->isUndefined()) - return OutStreamer.EmitLabel(CurrentFnSym); + if (CurrentFnSym->isVariable()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' is a protected alias"); + if (CurrentFnSym->isDefined()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); - report_fatal_error("'" + Twine(CurrentFnSym->getName()) + - "' label emitted multiple times to assembly file"); + return OutStreamer.EmitLabel(CurrentFnSym); } /// emitComments - Pretty-print comments for instructions. @@ -578,20 +586,24 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; - if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI, + FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload\n"; } - } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { + } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot( + &MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Reload\n"; - } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE( + &MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Spill\n"; } - } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { + } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot( + &MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } @@ -605,8 +617,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - OutStreamer.AddComment(Twine("implicit-def: ") + - TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment( + Twine("implicit-def: ") + + TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); OutStreamer.AddBlankLine(); } @@ -616,7 +629,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); Str += ' '; - Str += AP.TM.getRegisterInfo()->getName(Op.getReg()); + Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg()); Str += (Op.isDef() ? "<def>" : "<kill>"); } AP.OutStreamer.AddComment(Str); @@ -627,21 +640,27 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { /// of DBG_VALUE, returning true if it was able to do so. A false return /// means the target will need to handle MI in EmitInstruction. static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { - // This code handles only the 3-operand target-independent form. - if (MI->getNumOperands() != 3) + // This code handles only the 4-operand target-independent form. + if (MI->getNumOperands() != 4) return false; SmallString<128> Str; raw_svector_ostream OS(Str); OS << "DEBUG_VALUE: "; - DIVariable V(MI->getOperand(2).getMetadata()); + DIVariable V = MI->getDebugVariable(); if (V.getContext().isSubprogram()) { StringRef Name = DISubprogram(V.getContext()).getDisplayName(); if (!Name.empty()) OS << Name << ":"; } - OS << V.getName() << " <- "; + OS << V.getName(); + + DIExpression Expr = MI->getDebugExpression(); + if (Expr.isVariablePiece()) + OS << " [piece offset=" << Expr.getPieceOffset() + << " size=" << Expr.getPieceSize() << "]"; + OS << " <- "; // The second operand is only an offset if it's an immediate. bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); @@ -672,7 +691,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { Reg = MI->getOperand(0).getReg(); } else { assert(MI->getOperand(0).isFI() && "Unknown operand type"); - const TargetFrameLowering *TFI = AP.TM.getFrameLowering(); + const TargetFrameLowering *TFI = + AP.TM.getSubtargetImpl()->getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); Deref = true; @@ -686,7 +706,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (Deref) OS << '['; - OS << AP.TM.getRegisterInfo()->getName(Reg); + OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg); } if (Deref) @@ -709,8 +729,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH && - MF->getFunction()->needsUnwindTableEntry(); + return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { @@ -722,9 +741,6 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; - if (MMI->getCompactUnwindEncoding() != 0) - OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); - const MachineModuleInfo &MMI = MF->getMMI(); const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); @@ -732,6 +748,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { emitCFIInstruction(CFI); } +void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { + // The operands are the MCSymbol and the frame offset of the allocation. + MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol(); + int FrameOffset = MI.getOperand(1).getImm(); + + // Emit a symbol assignment. + OutStreamer.EmitAssignment(FrameAllocSym, + MCConstantExpr::Create(FrameOffset, OutContext)); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { @@ -742,12 +768,10 @@ void AsmPrinter::EmitFunctionBody() { // Print out code for the function. bool HasAnyRealCode = false; - const MachineInstr *LastMI = nullptr; for (auto &MBB : *MF) { // Print a label for the basic block. EmitBasicBlockStart(MBB); for (auto &MI : MBB) { - LastMI = &MI; // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && @@ -772,6 +796,10 @@ void AsmPrinter::EmitFunctionBody() { emitCFIInstruction(MI); break; + case TargetOpcode::FRAME_ALLOC: + emitFrameAlloc(MI); + break; + case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol()); @@ -804,26 +832,22 @@ void AsmPrinter::EmitFunctionBody() { } } } - } - // If the last instruction was a prolog label, then we have a situation where - // we emitted a prolog but no function body. This results in the ending prolog - // label equaling the end of function label and an invalid "row" in the - // FDE. We need to emit a noop in this situation so that the FDE's rows are - // valid. - bool RequiresNoop = LastMI && LastMI->isCFIInstruction(); + EmitBasicBlockEnd(MBB); + } // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) { + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { MCInst Noop; - TM.getInstrInfo()->getNoopForMachoTarget(Noop); - if (Noop.getOpcode()) { - OutStreamer.AddComment("avoids zero-length function"); + TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop); + OutStreamer.AddComment("avoids zero-length function"); + + // Targets can opt-out of emitting the noop here by leaving the opcode + // unspecified. + if (Noop.getOpcode()) OutStreamer.EmitInstruction(Noop, getSubtargetInfo()); - } else // Target not mc-ized yet. - OutStreamer.EmitRawText(StringRef("\tnop\n")); } const Function *F = MF->getFunction(); @@ -869,8 +893,6 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP); - bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (const auto &G : M.globals()) @@ -895,17 +917,18 @@ bool AsmPrinter::doFinalization(Module &M) { unsigned Arch = Triple(getTargetTriple()).getArch(); bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); MCInst TrapInst; - TM.getInstrInfo()->getTrap(TrapInst); + TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst); + unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment()); + + // Emit the right section for these functions. + OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); for (const auto &KV : JITI->getTables()) { uint64_t Count = 0; for (const auto &FunPair : KV.second) { // Emit the function labels to make this be a function entry point. MCSymbol *FunSym = OutContext.GetOrCreateSymbol(FunPair.second->getName()); - OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global); - // FIXME: JumpTableInstrInfo should store information about the required - // alignment of table entries and the size of the padding instruction. - EmitAlignment(3); + EmitAlignment(LogAlignment); if (IsThumb) OutStreamer.EmitThumbFunc(FunSym); if (MAI->hasDotTypeDotSizeDirective()) @@ -920,16 +943,16 @@ bool AsmPrinter::doFinalization(Module &M) { const MCSymbolRefExpr *TargetSymRef = MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, OutContext); - TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef); + TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch( + JumpToFun, TargetSymRef); OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); ++Count; } // Emit enough padding instructions to fill up to the next power of two. - // This assumes that the trap instruction takes 8 bytes or fewer. uint64_t Remaining = NextPowerOf2(Count) - Count; for (uint64_t C = 0; C < Remaining; ++C) { - EmitAlignment(3); + EmitAlignment(LogAlignment); OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); } @@ -976,35 +999,49 @@ bool AsmPrinter::doFinalization(Module &M) { } } - if (MAI->hasSetDirective()) { - OutStreamer.AddBlankLine(); - for (const auto &Alias : M.aliases()) { - MCSymbol *Name = getSymbol(&Alias); + OutStreamer.AddBlankLine(); + for (const auto &Alias : M.aliases()) { + MCSymbol *Name = getSymbol(&Alias); - if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) - OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) - OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); - else - assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); + else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); - EmitVisibility(Name, Alias.getVisibility()); + EmitVisibility(Name, Alias.getVisibility()); - // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, - lowerConstant(Alias.getAliasee(), *this)); - } + // Emit the directives as assignments aka .set: + OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee())); } GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) - MP->finishAssembly(*this); + MP->finishAssembly(M, *MI, *this); // Emit llvm.ident metadata in an '.ident' directive. EmitModuleIdents(M); + // Emit __morestack address if needed for indirect calls. + if (MMI->usesMorestackAddr()) { + const MCSection *ReadOnlySection = + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), + /*C=*/nullptr); + OutStreamer.SwitchSection(ReadOnlySection); + + MCSymbol *AddrSymbol = + OutContext.GetOrCreateSymbol(StringRef("__morestack_addr")); + OutStreamer.EmitLabel(AddrSymbol); + + const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); + unsigned PtrSize = DL.getPointerSize(0); + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), + PtrSize); + } + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -1062,7 +1099,8 @@ void AsmPrinter::EmitConstantPool() { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - SectionKind Kind = CPE.getSectionKind(TM.getDataLayout()); + SectionKind Kind = + CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout()); const Constant *C = nullptr; if (!CPE.isMachineConstantPoolEntry()) @@ -1115,7 +1153,8 @@ void AsmPrinter::EmitConstantPool() { OutStreamer.EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); - Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); + Offset = NewOffset + + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty); OutStreamer.EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) @@ -1130,7 +1169,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { - const DataLayout *DL = MF->getTarget().getDataLayout(); + const DataLayout *DL = MF->getSubtarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1161,7 +1200,8 @@ void AsmPrinter::EmitJumpTableInfo() { JTInDiffSection = true; } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout()))); + EmitAlignment(Log2_32( + MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout()))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -1174,17 +1214,17 @@ void AsmPrinter::EmitJumpTableInfo() { // If this jump table was deleted, ignore it. if (JTBBs.empty()) continue; - // For the EK_LabelDifference32 entry, if the target supports .set, emit a - // .set directive for each unique entry. This reduces the number of - // relocations the assembler will generate for the jump table. + // For the EK_LabelDifference32 entry, if using .set avoids a relocation, + /// emit a .set directive for each unique entry. if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && - MAI->hasSetDirective()) { + MAI->doesSetDirectiveSuppressesReloc()) { SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; - if (!EmittedSets.insert(MBB)) continue; + if (!EmittedSets.insert(MBB).second) + continue; // .set LJTSet, LBB32-base const MCExpr *LHS = @@ -1223,8 +1263,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: - Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, - OutContext); + Value = + TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry( + MJTI, MBB, UID, OutContext); break; case MachineJumpTableInfo::EK_BlockAddress: // EK_BlockAddress - Each entry is a plain address of block, e.g.: @@ -1250,34 +1291,30 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, } case MachineJumpTableInfo::EK_LabelDifference32: { - // EK_LabelDifference32 - Each entry is the address of the block minus - // the address of the jump table. This is used for PIC jump tables where - // gprel32 is not supported. e.g.: + // Each entry is the address of the block minus the address of the jump + // table. This is used for PIC jump tables where gprel32 is not supported. + // e.g.: // .word LBB123 - LJTI1_2 - // If the .set directive is supported, this is emitted as: + // If the .set directive avoids relocations, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - - // If we have emitted set directives for the jump table entries, print - // them rather than the entries themselves. If we're emitting PIC, then - // emit the table entries as differences between two text section labels. - if (MAI->hasSetDirective()) { - // If we used .set, reference the .set's symbol. + if (MAI->doesSetDirectiveSuppressesReloc()) { Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), OutContext); break; } - // Otherwise, use the difference as the jump table entry. Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); - const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext); - Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); + Value = MCBinaryExpr::CreateSub(Value, Base, OutContext); break; } } assert(Value && "Unknown entry kind!"); - unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); + unsigned EntrySize = + MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout()); OutStreamer.EmitValue(Value, EntrySize); } @@ -1387,7 +1424,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); unsigned Align = Log2_32(DL->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), [](const Structor &L, @@ -1450,9 +1487,9 @@ void AsmPrinter::EmitInt32(int Value) const { OutStreamer.EmitIntValue(Value, 4); } -/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size -/// in bytes of the directive is specified by Size and Hi/Lo specify the -/// labels. This implicitly uses .set if it is available. +/// Emit something like ".long Hi-Lo" where the size in bytes of the directive +/// is specified by Size and Hi/Lo specify the labels. This implicitly uses +/// .set if it avoids relocations. void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { // Get the Hi-Lo expression. @@ -1461,7 +1498,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - if (!MAI->hasSetDirective()) { + if (!MAI->doesSetDirectiveSuppressesReloc()) { OutStreamer.EmitValue(Diff, Size); return; } @@ -1472,36 +1509,6 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size); } -/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" -/// where the size in bytes of the directive is specified by Size and Hi/Lo -/// specify the labels. This implicitly uses .set if it is available. -void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, - unsigned Size) const { - - // Emit Hi+Offset - Lo - // Get the Hi+Offset expression. - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), - MCConstantExpr::Create(Offset, OutContext), - OutContext); - - // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = - MCBinaryExpr::CreateSub(Plus, - MCSymbolRefExpr::Create(Lo, OutContext), - OutContext); - - if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, Size); - else { - // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); - OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, Size); - } -} - /// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. @@ -1531,24 +1538,27 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // if required for correctness. // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { - if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); + if (GV) + NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(), + NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + assert(NumBits < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); if (getCurrentSection()->getKind().isText()) - OutStreamer.EmitCodeAlignment(1 << NumBits); + OutStreamer.EmitCodeAlignment(1u << NumBits); else - OutStreamer.EmitValueToAlignment(1 << NumBits); + OutStreamer.EmitValueToAlignment(1u << NumBits); } //===----------------------------------------------------------------------===// // Constant emission. //===----------------------------------------------------------------------===// -/// lowerConstant - Lower the specified LLVM Constant to an MCExpr. -/// -static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { - MCContext &Ctx = AP.OutContext; +const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { + MCContext &Ctx = OutContext; if (CV->isNullValue() || isa<UndefValue>(CV)) return MCConstantExpr::Create(0, Ctx); @@ -1557,19 +1567,18 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCConstantExpr::Create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) - return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); + return MCSymbolRefExpr::Create(getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) - return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); + return MCSymbolRefExpr::Create(GetBlockAddressSymbol(BA), Ctx); const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); if (!CE) { llvm_unreachable("Unknown constant value to lower!"); } - if (const MCExpr *RelocExpr = - AP.getObjFileLowering().getExecutableRelativeSymbol(CE, *AP.Mang, - AP.TM)) + if (const MCExpr *RelocExpr + = getObjFileLowering().getExecutableRelativeSymbol(CE, *Mang, TM)) return RelocExpr; switch (CE->getOpcode()) { @@ -1577,10 +1586,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression( + CE, TM.getSubtargetImpl()->getDataLayout())) if (C != CE) - return lowerConstant(C, AP); + return lowerConstant(C); // Otherwise report the problem to the user. { @@ -1588,16 +1597,16 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, - !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); + !MF ? nullptr : MF->getFunction()->getParent()); report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); // Generate a symbolic expression for the byte address APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); - const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); + const MCExpr *Base = lowerConstant(CE->getOperand(0)); if (!OffsetAI) return Base; @@ -1613,26 +1622,26 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // is reasonable to treat their delta as a 32-bit value. // FALL THROUGH. case Instruction::BitCast: - return lowerConstant(CE->getOperand(0), AP); + return lowerConstant(CE->getOperand(0)); case Instruction::IntToPtr: { - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), false/*ZExt*/); - return lowerConstant(Op, AP); + return lowerConstant(Op); } case Instruction::PtrToInt: { - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *TM.getSubtargetImpl()->getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); Type *Ty = CE->getType(); - const MCExpr *OpExpr = lowerConstant(Op, AP); + const MCExpr *OpExpr = lowerConstant(Op); // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. @@ -1658,8 +1667,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - const MCExpr *LHS = lowerConstant(CE->getOperand(0), AP); - const MCExpr *RHS = lowerConstant(CE->getOperand(1), AP); + const MCExpr *LHS = lowerConstant(CE->getOperand(0)); + const MCExpr *RHS = lowerConstant(CE->getOperand(1)); switch (CE->getOpcode()) { default: llvm_unreachable("Unknown binary operator constant cast expr"); case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); @@ -1699,7 +1708,8 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) return -1; - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType()); + uint64_t Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType()); uint64_t Value = CI->getZExtValue(); // Make sure the constant is at least 8 bits long and has a power @@ -1743,7 +1753,9 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); + uint64_t Bytes = + AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) return AP.OutStreamer.EmitFill(Bytes, Value); @@ -1793,7 +1805,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } } - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1808,7 +1820,9 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); + uint64_t Bytes = + AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value); } else { @@ -1821,7 +1835,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL.getTypeAllocSize(CV->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1831,7 +1845,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; @@ -1881,7 +1895,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() && + if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; @@ -1900,13 +1914,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { } // Emit the tail padding for the long double. - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -1952,7 +1966,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType()); + uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) @@ -1962,7 +1977,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { } static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size); @@ -2022,12 +2037,13 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size); + AP.OutStreamer.EmitValue(AP.lowerConstant(CV), Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); + uint64_t Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { @@ -2056,7 +2072,7 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + Name + Twine(ID)); } @@ -2064,7 +2080,7 @@ MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Name); } @@ -2080,7 +2096,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); @@ -2094,7 +2110,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); @@ -2292,6 +2308,11 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { if (!S.usesMetadata()) return nullptr; + assert(!S.useStatepoints() && "statepoints do not currently support custom" + " stackmap formats, please see the documentation for a description of" + " the default format. If you really need a custom serialized format," + " please file a bug"); + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); gcp_map_type::iterator GCPI = GCMap.find(&S); if (GCPI != GCMap.end()) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 02cd12be0453..00681f6ce8cd 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "ByteStreamer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/ADT/SmallBitVector.h" +#include "DwarfExpression.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSection.h" @@ -27,10 +27,30 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" +void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { + BS.EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::EmitSigned(int Value) { + BS.EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) { + BS.EmitULEB128(Value, Twine(Value)); +} + +bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -130,7 +150,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: - return TM.getDataLayout()->getPointerSize(); + return TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: @@ -186,60 +206,6 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, EmitLabelDifference(Label, SectionLabel, 4); } -/// Emit a dwarf register operation. -static void emitDwarfRegOp(ByteStreamer &Streamer, int Reg) { - assert(Reg >= 0); - if (Reg < 32) { - Streamer.EmitInt8(dwarf::DW_OP_reg0 + Reg, - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - } else { - Streamer.EmitInt8(dwarf::DW_OP_regx, "DW_OP_regx"); - Streamer.EmitULEB128(Reg, Twine(Reg)); - } -} - -/// Emit an (double-)indirect dwarf register operation. -static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset, - bool Deref) { - assert(Reg >= 0); - if (Reg < 32) { - Streamer.EmitInt8(dwarf::DW_OP_breg0 + Reg, - dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); - } else { - Streamer.EmitInt8(dwarf::DW_OP_bregx, "DW_OP_bregx"); - Streamer.EmitULEB128(Reg, Twine(Reg)); - } - Streamer.EmitSLEB128(Offset); - if (Deref) - Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); -} - -/// Emit a dwarf register operation for describing -/// - a small value occupying only part of a register or -/// - a small register representing only part of a value. -static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits, - unsigned OffsetInBits) { - assert(SizeInBits > 0 && "zero-sized piece"); - unsigned SizeOfByte = 8; - if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { - Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece"); - Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits)); - Streamer.EmitULEB128(OffsetInBits, Twine(OffsetInBits)); - } else { - Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece"); - unsigned ByteSize = SizeInBits / SizeOfByte; - Streamer.EmitULEB128(ByteSize, Twine(ByteSize)); - } -} - -/// Emit a shift-right dwarf expression. -static void emitDwarfOpShr(ByteStreamer &Streamer, - unsigned ShiftBy) { - Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); - Streamer.EmitULEB128(ShiftBy); - Streamer.EmitInt8(dwarf::DW_OP_shr, "DW_OP_shr"); -} - // Some targets do not provide a DWARF register number for every // register. This function attempts to emit a DWARF register by // emitting a piece of a super-register or by piecing together @@ -249,112 +215,47 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) const { assert(MLoc.isReg() && "MLoc must be a register"); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); - - // If this is a valid register number, emit it. - if (Reg >= 0) { - emitDwarfRegOp(Streamer, Reg); - emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits); - return; - } - - // Walk up the super-register chain until we find a valid number. - // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. - for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { - Reg = TRI->getDwarfRegNum(*SR, false); - if (Reg >= 0) { - unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg()); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); - OutStreamer.AddComment("super-register"); - emitDwarfRegOp(Streamer, Reg); - if (PieceOffsetInBits == Offset) { - emitDwarfOpPiece(Streamer, Size, Offset); - } else { - // If this is part of a variable in a sub-register at a - // non-zero offset, we need to manually shift the value into - // place, since the DW_OP_piece describes the part of the - // variable, not the position of the subregister. - emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits); - if (Offset) - emitDwarfOpShr(Streamer, Offset); - } - return; - } - } - - // Otherwise, attempt to find a covering set of sub-register numbers. - // For example, Q0 on ARM is a composition of D0+D1. - // - // Keep track of the current position so we can emit the more - // efficient DW_OP_piece. - unsigned CurPos = PieceOffsetInBits; - // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8; - // Keep track of the bits in the register we already emitted, so we - // can avoid emitting redundant aliasing subregs. - SmallBitVector Coverage(RegSize, false); - for (MCSubRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { - unsigned Idx = TRI->getSubRegIndex(MLoc.getReg(), *SR); - unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); - Reg = TRI->getDwarfRegNum(*SR, false); - - // Intersection between the bits we already emitted and the bits - // covered by this subregister. - SmallBitVector Intersection(RegSize, false); - Intersection.set(Offset, Offset + Size); - Intersection ^= Coverage; - - // If this sub-register has a DWARF number and we haven't covered - // its range, emit a DWARF piece for it. - if (Reg >= 0 && Intersection.any()) { - OutStreamer.AddComment("sub-register"); - emitDwarfRegOp(Streamer, Reg); - emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); - CurPos = Offset + Size; - - // Mark it as emitted. - Coverage.set(Offset, Offset + Size); - } - } + DebugLocDwarfExpression Expr(*this, Streamer); + Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits); +} - if (CurPos == PieceOffsetInBits) { - // FIXME: We have no reasonable way of handling errors in here. - Streamer.EmitInt8(dwarf::DW_OP_nop, - "nop (could not find a dwarf register number)"); - } +void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) const { + DebugLocDwarfExpression Expr(*this, Streamer); + Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits); } /// EmitDwarfRegOp - Emit dwarf register operation. void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, const MachineLocation &MLoc, bool Indirect) const { - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + DebugLocDwarfExpression Expr(*this, Streamer); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { // We assume that pointers are always in an addressable register. - if (Indirect || MLoc.isIndirect()) { + if (Indirect || MLoc.isIndirect()) // FIXME: We have no reasonable way of handling errors in here. The // caller might be in the middle of a dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more // mature. - Streamer.EmitInt8(dwarf::DW_OP_nop, - "nop (invalid dwarf register number for indirect loc)"); - return; - } + return Expr.EmitOp(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); // Attempt to find a valid super- or sub-register. - return EmitDwarfRegOpPiece(Streamer, MLoc); + if (!Expr.AddMachineRegPiece(MLoc.getReg())) + Expr.EmitOp(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); + return; } if (MLoc.isIndirect()) - emitDwarfRegOpIndirect(Streamer, Reg, MLoc.getOffset(), Indirect); + Expr.AddRegIndirect(Reg, MLoc.getOffset(), Indirect); else if (Indirect) - emitDwarfRegOpIndirect(Streamer, Reg, 0, false); + Expr.AddRegIndirect(Reg, 0, false); else - emitDwarfRegOp(Streamer, Reg); + Expr.AddReg(Reg); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 2825367abd8f..31867dd01d05 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ -#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H #include "llvm/Support/DataTypes.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 46ee0c856bda..f6ce4a08db5b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -63,7 +64,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { if (LocInfo->getNumOperands() != 0) if (const ConstantInt *CI = - dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine))) + mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine))) LocCookie = CI->getZExtValue(); } @@ -89,6 +90,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, assert(MCAI && "No MCAsmInfo"); if (!MCAI->useIntegratedAssembler() && !OutStreamer.isIntegratedAssemblerRequired()) { + emitInlineAsmStart(TM.getSubtarget<MCSubtargetInfo>()); OutStreamer.EmitRawText(Str); emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr); return; @@ -110,14 +112,14 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, HasDiagHandler = true; } - MemoryBuffer *Buffer; + std::unique_ptr<MemoryBuffer> Buffer; if (isNullTerminated) Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>"); else Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>"); // Tell SrcMgr about this buffer, it takes ownership of the buffer. - SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); std::unique_ptr<MCAsmParser> Parser( createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); @@ -146,7 +148,12 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); + if (MF) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); + } + emitInlineAsmStart(STIOrig); // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, /*NoFinalize*/ true); @@ -462,7 +469,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { if (MI->getOperand(i-1).isMetadata() && (LocMD = MI->getOperand(i-1).getMetadata()) && LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { LocCookie = CI->getZExtValue(); break; } @@ -500,7 +508,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); if (!strcmp(Code, "private")) { OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { @@ -561,5 +569,7 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } +void AsmPrinter::emitInlineAsmStart(const MCSubtargetInfo &StartInfo) const {} + void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, const MCSubtargetInfo *EndInfo) const {} diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 6c01d65cc1aa..42be1149e973 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_BYTESTREAMER_H -#define LLVM_CODEGEN_BYTESTREAMER_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H +#include "DIEHash.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" -#include "DIEHash.h" namespace llvm { class ByteStreamer { diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index f555f212a978..01d2c7220ab9 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -9,7 +9,9 @@ add_llvm_library(LLVMAsmPrinter DIEHash.cpp DwarfAccelTable.cpp DwarfCFIException.cpp + DwarfCompileUnit.cpp DwarfDebug.cpp + DwarfExpression.cpp DwarfFile.cpp DwarfStringPool.cpp DwarfUnit.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index c3dcd9c8c19d..64ba56bdacf7 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -11,13 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "DIE.h" +#include "llvm/CodeGen/DIE.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" @@ -370,6 +372,29 @@ void DIEString::print(raw_ostream &O) const { // DIEEntry Implementation //===----------------------------------------------------------------------===// +/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the +/// directive is specified by Size and Hi/Lo specify the labels. +static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi, + uint64_t Offset, const MCSymbol *Lo, + unsigned Size) { + MCContext &Context = Streamer.getContext(); + + // Emit Hi+Offset - Lo + // Get the Hi+Offset expression. + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context), + MCConstantExpr::Create(Offset, Context), Context); + + // Get the Hi+Offset-Lo expression. + const MCExpr *Diff = MCBinaryExpr::CreateSub( + Plus, MCSymbolRefExpr::Create(Lo, Context), Context); + + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = Context.CreateTempSymbol(); + Streamer.EmitAssignment(SetLabel, Diff); + Streamer.EmitSymbolValue(SetLabel, Size); +} + /// EmitValue - Emit debug information entry offset. /// void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { @@ -388,9 +413,9 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, DIEEntry::getRefAddrSize(AP)); else - AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr, - CU->getSectionSym(), - DIEEntry::getRefAddrSize(AP)); + emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr, + CU->getSectionSym(), + DIEEntry::getRefAddrSize(AP)); } else AP->EmitInt32(Entry.getOffset()); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h deleted file mode 100644 index ef05f1707810..000000000000 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ /dev/null @@ -1,587 +0,0 @@ -//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Data structures for DWARF info entries. -// -//===----------------------------------------------------------------------===// - -#ifndef CODEGEN_ASMPRINTER_DIE_H__ -#define CODEGEN_ASMPRINTER_DIE_H__ - -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Dwarf.h" -#include <vector> - -namespace llvm { -class AsmPrinter; -class MCExpr; -class MCSymbol; -class raw_ostream; -class DwarfTypeUnit; - -//===--------------------------------------------------------------------===// -/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a -/// Dwarf abbreviation. -class DIEAbbrevData { - /// Attribute - Dwarf attribute code. - /// - dwarf::Attribute Attribute; - - /// Form - Dwarf form code. - /// - dwarf::Form Form; - -public: - DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} - - // Accessors. - dwarf::Attribute getAttribute() const { return Attribute; } - dwarf::Form getForm() const { return Form; } - - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; -}; - -//===--------------------------------------------------------------------===// -/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug -/// information object. -class DIEAbbrev : public FoldingSetNode { - /// Unique number for node. - /// - unsigned Number; - - /// Tag - Dwarf tag code. - /// - dwarf::Tag Tag; - - /// Children - Whether or not this node has children. - /// - // This cheats a bit in all of the uses since the values in the standard - // are 0 and 1 for no children and children respectively. - bool Children; - - /// Data - Raw data bytes for abbreviation. - /// - SmallVector<DIEAbbrevData, 12> Data; - -public: - DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {} - - // Accessors. - dwarf::Tag getTag() const { return Tag; } - unsigned getNumber() const { return Number; } - bool hasChildren() const { return Children; } - const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } - void setChildrenFlag(bool hasChild) { Children = hasChild; } - void setNumber(unsigned N) { Number = N; } - - /// AddAttribute - Adds another set of attribute information to the - /// abbreviation. - void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { - Data.push_back(DIEAbbrevData(Attribute, Form)); - } - - /// Profile - Used to gather unique data for the abbreviation folding set. - /// - void Profile(FoldingSetNodeID &ID) const; - - /// Emit - Print the abbreviation using the specified asm printer. - /// - void Emit(AsmPrinter *AP) const; - -#ifndef NDEBUG - void print(raw_ostream &O); - void dump(); -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIE - A structured debug information entry. Has an abbreviation which -/// describes its organization. -class DIEValue; - -class DIE { -protected: - /// Offset - Offset in debug info section. - /// - unsigned Offset; - - /// Size - Size of instance + children. - /// - unsigned Size; - - /// Abbrev - Buffer for constructing abbreviation. - /// - DIEAbbrev Abbrev; - - /// Children DIEs. - /// - // This can't be a vector<DIE> because pointer validity is requirent for the - // Parent pointer and DIEEntry. - // It can't be a list<DIE> because some clients need pointer validity before - // the object has been added to any child list - // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may - // be more convoluted than beneficial. - std::vector<std::unique_ptr<DIE>> Children; - - DIE *Parent; - - /// Attribute values. - /// - SmallVector<DIEValue *, 12> Values; - -protected: - DIE() - : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no), - Parent(nullptr) {} - -public: - explicit DIE(dwarf::Tag Tag) - : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), - Parent(nullptr) {} - - // Accessors. - DIEAbbrev &getAbbrev() { return Abbrev; } - const DIEAbbrev &getAbbrev() const { return Abbrev; } - unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - dwarf::Tag getTag() const { return Abbrev.getTag(); } - unsigned getOffset() const { return Offset; } - unsigned getSize() const { return Size; } - const std::vector<std::unique_ptr<DIE>> &getChildren() const { - return Children; - } - const SmallVectorImpl<DIEValue *> &getValues() const { return Values; } - DIE *getParent() const { return Parent; } - /// Climb up the parent chain to get the compile or type unit DIE this DIE - /// belongs to. - const DIE *getUnit() const; - /// Similar to getUnit, returns null when DIE is not added to an - /// owner yet. - const DIE *getUnitOrNull() const; - void setOffset(unsigned O) { Offset = O; } - void setSize(unsigned S) { Size = S; } - - /// addValue - Add a value and attributes to a DIE. - /// - void addValue(dwarf::Attribute Attribute, dwarf::Form Form, DIEValue *Value) { - Abbrev.AddAttribute(Attribute, Form); - Values.push_back(Value); - } - - /// addChild - Add a child to the DIE. - /// - void addChild(std::unique_ptr<DIE> Child) { - assert(!Child->getParent()); - Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); - Child->Parent = this; - Children.push_back(std::move(Child)); - } - - /// findAttribute - Find a value in the DIE with the attribute given, - /// returns NULL if no such attribute exists. - DIEValue *findAttribute(dwarf::Attribute Attribute) const; - -#ifndef NDEBUG - void print(raw_ostream &O, unsigned IndentCount = 0) const; - void dump(); -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEValue - A debug information entry value. Some of these roughly correlate -/// to DWARF attribute classes. -/// -class DIEValue { - virtual void anchor(); - -public: - enum Type { - isInteger, - isString, - isExpr, - isLabel, - isDelta, - isEntry, - isTypeSignature, - isBlock, - isLoc, - isLocList, - }; - -protected: - /// Ty - Type of data stored in the value. - /// - Type Ty; - - explicit DIEValue(Type T) : Ty(T) {} - virtual ~DIEValue() {} - -public: - // Accessors - Type getType() const { return Ty; } - - /// EmitValue - Emit value via the Dwarf writer. - /// - virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; - - /// SizeOf - Return the size of a value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; - -#ifndef NDEBUG - virtual void print(raw_ostream &O) const = 0; - void dump() const; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEInteger - An integer value DIE. -/// -class DIEInteger : public DIEValue { - uint64_t Integer; - -public: - explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {} - - /// BestForm - Choose the best form for integer. - /// - static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { - if (IsSigned) { - const int64_t SignedInt = Int; - if ((char)Int == SignedInt) - return dwarf::DW_FORM_data1; - if ((short)Int == SignedInt) - return dwarf::DW_FORM_data2; - if ((int)Int == SignedInt) - return dwarf::DW_FORM_data4; - } else { - if ((unsigned char)Int == Int) - return dwarf::DW_FORM_data1; - if ((unsigned short)Int == Int) - return dwarf::DW_FORM_data2; - if ((unsigned int)Int == Int) - return dwarf::DW_FORM_data4; - } - return dwarf::DW_FORM_data8; - } - - /// EmitValue - Emit integer of appropriate size. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - uint64_t getValue() const { return Integer; } - - /// SizeOf - Determine size of integer value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *I) { return I->getType() == isInteger; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEExpr - An expression DIE. -// -class DIEExpr : public DIEValue { - const MCExpr *Expr; - -public: - explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} - - /// EmitValue - Emit expression value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// getValue - Get MCExpr. - /// - const MCExpr *getValue() const { return Expr; } - - /// SizeOf - Determine size of expression value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isExpr; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIELabel - A label DIE. -// -class DIELabel : public DIEValue { - const MCSymbol *Label; - -public: - explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {} - - /// EmitValue - Emit label value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// getValue - Get MCSymbol. - /// - const MCSymbol *getValue() const { return Label; } - - /// SizeOf - Determine size of label value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *L) { return L->getType() == isLabel; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEDelta - A simple label difference DIE. -/// -class DIEDelta : public DIEValue { - const MCSymbol *LabelHi; - const MCSymbol *LabelLo; - -public: - DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo) - : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {} - - /// EmitValue - Emit delta value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of delta value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isDelta; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEString - A container for string values. -/// -class DIEString : public DIEValue { - const DIEValue *Access; - const StringRef Str; - -public: - DIEString(const DIEValue *Acc, const StringRef S) - : DIEValue(isString), Access(Acc), Str(S) {} - - /// getString - Grab the string out of the object. - StringRef getString() const { return Str; } - - /// EmitValue - Emit delta value. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of delta value in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *D) { return D->getType() == isString; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEEntry - A pointer to another debug information entry. An instance of -/// this class can also be used as a proxy for a debug information entry not -/// yet defined (ie. types.) -class DIEEntry : public DIEValue { - DIE &Entry; - -public: - explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) { - } - - DIE &getEntry() const { return Entry; } - - /// EmitValue - Emit debug information entry offset. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of debug information entry in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { - return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) - : sizeof(int32_t); - } - - /// Returns size of a ref_addr entry. - static unsigned getRefAddrSize(AsmPrinter *AP); - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isEntry; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// \brief A signature reference to a type unit. -class DIETypeSignature : public DIEValue { - const DwarfTypeUnit &Unit; - -public: - explicit DIETypeSignature(const DwarfTypeUnit &Unit) - : DIEValue(isTypeSignature), Unit(Unit) {} - - /// \brief Emit type unit signature. - void EmitValue(AsmPrinter *Asm, dwarf::Form Form) const override; - - /// Returns size of a ref_sig8 entry. - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override { - assert(Form == dwarf::DW_FORM_ref_sig8); - return 8; - } - - // \brief Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { - return E->getType() == isTypeSignature; - } -#ifndef NDEBUG - void print(raw_ostream &O) const override; - void dump() const; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIELoc - Represents an expression location. -// -class DIELoc : public DIEValue, public DIE { - mutable unsigned Size; // Size in bytes excluding size header. -public: - DIELoc() : DIEValue(isLoc), Size(0) {} - - /// ComputeSize - Calculate the size of the location expression. - /// - unsigned ComputeSize(AsmPrinter *AP) const; - - /// BestForm - Choose the best form for data. - /// - dwarf::Form BestForm(unsigned DwarfVersion) const { - if (DwarfVersion > 3) - return dwarf::DW_FORM_exprloc; - // Pre-DWARF4 location expressions were blocks and not exprloc. - if ((unsigned char)Size == Size) - return dwarf::DW_FORM_block1; - if ((unsigned short)Size == Size) - return dwarf::DW_FORM_block2; - if ((unsigned int)Size == Size) - return dwarf::DW_FORM_block4; - return dwarf::DW_FORM_block; - } - - /// EmitValue - Emit location data. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of location data in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isLoc; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIEBlock - Represents a block of values. -// -class DIEBlock : public DIEValue, public DIE { - mutable unsigned Size; // Size in bytes excluding size header. -public: - DIEBlock() : DIEValue(isBlock), Size(0) {} - - /// ComputeSize - Calculate the size of the location expression. - /// - unsigned ComputeSize(AsmPrinter *AP) const; - - /// BestForm - Choose the best form for data. - /// - dwarf::Form BestForm() const { - if ((unsigned char)Size == Size) - return dwarf::DW_FORM_block1; - if ((unsigned short)Size == Size) - return dwarf::DW_FORM_block2; - if ((unsigned int)Size == Size) - return dwarf::DW_FORM_block4; - return dwarf::DW_FORM_block; - } - - /// EmitValue - Emit location data. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of location data in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isBlock; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -//===--------------------------------------------------------------------===// -/// DIELocList - Represents a pointer to a location list in the debug_loc -/// section. -// -class DIELocList : public DIEValue { - // Index into the .debug_loc vector. - size_t Index; - -public: - DIELocList(size_t I) : DIEValue(isLocList), Index(I) {} - - /// getValue - Grab the current index out. - size_t getValue() const { return Index; } - - /// EmitValue - Emit location data. - /// - void EmitValue(AsmPrinter *AP, dwarf::Form Form) const override; - - /// SizeOf - Determine size of location data in bytes. - /// - unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const override; - - // Implement isa/cast/dyncast. - static bool classof(const DIEValue *E) { return E->getType() == isLocList; } - -#ifndef NDEBUG - void print(raw_ostream &O) const override; -#endif -}; - -} // end llvm namespace - -#endif diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index c2fad59aa4e1..1e2ba2ccb9a4 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -13,11 +13,11 @@ #include "ByteStreamer.h" #include "DIEHash.h" -#include "DIE.h" #include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" @@ -261,7 +261,7 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, return; } - // otherwise, b) use the letter 'T' as a the marker, ... + // otherwise, b) use the letter 'T' as the marker, ... addULEB128('T'); addULEB128(Attribute); diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index 175d66063d49..ac014b727b75 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__ -#define CODEGEN_ASMPRINTER_DIEHASH_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H -#include "DIE.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/Support/MD5.h" namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index a66d08e501ac..0c2a5e551ad0 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -8,25 +8,25 @@ //===----------------------------------------------------------------------===// #include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <map> -#include <set> +using namespace llvm; #define DEBUG_TYPE "dwarfdebug" -namespace llvm { - // \brief If @MI is a DBG_VALUE with debug value described by a // defined register, returns the number of this register. // In the other case, returns 0. static unsigned isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); - assert(MI.getNumOperands() == 3); + assert(MI.getNumOperands() == 4); // If location of variable is described using a register (directly or // indirecltly), this register is always a first operand. return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; @@ -36,7 +36,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var, const MachineInstr &MI) { // Instruction range should start with a DBG_VALUE instruction for the // variable. - assert(MI.isDebugValue() && MI.getDebugVariable() == Var); + assert(MI.isDebugValue() && "not a DBG_VALUE"); auto &Ranges = VarInstrRanges[Var]; if (!Ranges.empty() && Ranges.back().second == nullptr && Ranges.back().first->isIdenticalTo(&MI)) { @@ -96,6 +96,19 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, VarSet.push_back(Var); } +// \brief Terminate the location range for variables described by register at +// @I by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, + RegDescribedVarsMap::iterator I, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + HistMap.endInstrRange(Var, ClobberingInstr); + RegVars.erase(I); +} + // \brief Terminate the location range for variables described by register // @RegNo by inserting @ClobberingInstr to their history. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, @@ -104,22 +117,26 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, const auto &I = RegVars.find(RegNo); if (I == RegVars.end()) return; - // Iterate over all variables described by this register and add this - // instruction to their history, clobbering it. - for (const auto &Var : I->second) - HistMap.endInstrRange(Var, ClobberingInstr); - RegVars.erase(I); + clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr); } -// \brief Collect all registers clobbered by @MI and insert them to @Regs. -static void collectClobberedRegisters(const MachineInstr &MI, +// \brief Collect all registers clobbered by @MI and apply the functor +// @Func to their RegNo. +// @Func should be a functor with a void(unsigned) signature. We're +// not using std::function here for performance reasons. It has a +// small but measurable impact. By using a functor instead of a +// std::set& here, we can avoid the overhead of constructing +// temporaries in calculateDbgValueHistory, which has a significant +// performance impact. +template<typename Callable> +static void applyToClobberedRegisters(const MachineInstr &MI, const TargetRegisterInfo *TRI, - std::set<unsigned> &Regs) { + Callable Func) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Regs.insert(*AI); + Func(*AI); } } @@ -133,11 +150,12 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { // as the return instruction. DebugLoc LastLoc = LastMI->getDebugLoc(); auto Res = LastMI; - for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend(); - ++I) { + for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)), + E = MBB.rend(); + I != E; ++I) { if (I->getDebugLoc() != LastLoc) return Res; - Res = std::prev(I.base()); + Res = &*I; } // If all instructions have the same debug location, assume whole MBB is // an epilogue. @@ -145,25 +163,26 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { } // \brief Collect registers that are modified in the function body (their -// contents is changed only in the prologue and epilogue). +// contents is changed outside of the prologue and epilogue). static void collectChangingRegs(const MachineFunction *MF, const TargetRegisterInfo *TRI, - std::set<unsigned> &Regs) { + BitVector &Regs) { for (const auto &MBB : *MF) { auto FirstEpilogueInst = getFirstEpilogueInst(MBB); - bool IsInEpilogue = false; + for (const auto &MI : MBB) { - IsInEpilogue |= &MI == FirstEpilogueInst; - if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue) - collectClobberedRegisters(MI, TRI, Regs); + if (&MI == FirstEpilogueInst) + break; + if (!MI.getFlag(MachineInstr::FrameSetup)) + applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); }); } } } -void calculateDbgValueHistory(const MachineFunction *MF, - const TargetRegisterInfo *TRI, - DbgValueHistoryMap &Result) { - std::set<unsigned> ChangingRegs; +void llvm::calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result) { + BitVector ChangingRegs(TRI->getNumRegs()); collectChangingRegs(MF, TRI, ChangingRegs); RegDescribedVarsMap RegVars; @@ -172,17 +191,18 @@ void calculateDbgValueHistory(const MachineFunction *MF, if (!MI.isDebugValue()) { // Not a DBG_VALUE instruction. It may clobber registers which describe // some variables. - std::set<unsigned> MIClobberedRegs; - collectClobberedRegisters(MI, TRI, MIClobberedRegs); - for (unsigned RegNo : MIClobberedRegs) { - if (ChangingRegs.count(RegNo)) + applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) { + if (ChangingRegs.test(RegNo)) clobberRegisterUses(RegVars, RegNo, Result, MI); - } + }); continue; } assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); - const MDNode *Var = MI.getDebugVariable(); + // Use the base variable (without any DW_OP_piece expressions) + // as index into History. The full variables including the + // piece expressions are attached to the MI. + DIVariable Var = MI.getDebugVariable(); if (unsigned PrevReg = Result.getRegisterForVar(Var)) dropRegDescribedVar(RegVars, PrevReg, Var); @@ -196,11 +216,12 @@ void calculateDbgValueHistory(const MachineFunction *MF, // Make sure locations for register-described variables are valid only // until the end of the basic block (unless it's the last basic block, in // which case let their liveness run off to the end of the function). - if (!MBB.empty() && &MBB != &MF->back()) { - for (unsigned RegNo : ChangingRegs) - clobberRegisterUses(RegVars, RegNo, Result, MBB.back()); + if (!MBB.empty() && &MBB != &MF->back()) { + for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) { + auto CurElem = I++; // CurElem can be erased below. + if (ChangingRegs.test(CurElem->first)) + clobberRegisterUses(RegVars, CurElem, Result, MBB.back()); + } } } } - -} diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index b9177f05950e..4b6200717d26 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ -#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" @@ -28,9 +28,11 @@ class DbgValueHistoryMap { // range. If end is not specified, location is valid until the start // instruction of the next instruction range, or until the end of the // function. +public: typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange; typedef SmallVector<InstrRange, 4> InstrRanges; typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap; +private: InstrRangesMap VarInstrRanges; public: diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 3beb799203b7..b4fcada59ecc 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ -#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #include "llvm/IR/Constants.h" -#include "llvm/MC/MachineLocation.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" namespace llvm { -class DwarfCompileUnit; class MDNode; /// \brief This struct describes location entries emitted in the .debug_loc /// section. @@ -26,25 +26,30 @@ class DebugLocEntry { public: /// A single location or constant. struct Value { - Value(const MDNode *Var, int64_t i) - : Variable(Var), EntryKind(E_Integer) { + Value(const MDNode *Var, const MDNode *Expr, int64_t i) + : Variable(Var), Expression(Expr), EntryKind(E_Integer) { Constant.Int = i; } - Value(const MDNode *Var, const ConstantFP *CFP) - : Variable(Var), EntryKind(E_ConstantFP) { + Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP) + : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) { Constant.CFP = CFP; } - Value(const MDNode *Var, const ConstantInt *CIP) - : Variable(Var), EntryKind(E_ConstantInt) { + Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP) + : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) { Constant.CIP = CIP; } - Value(const MDNode *Var, MachineLocation Loc) - : Variable(Var), EntryKind(E_Location), Loc(Loc) { + Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc) + : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) { + assert(DIVariable(Var).Verify()); + assert(DIExpression(Expr).Verify()); } // The variable to which this location entry corresponds. const MDNode *Variable; + // Any complex address location expression for this Value. + const MDNode *Expression; + // Type of entry that this represents. enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; enum EntryType EntryKind; @@ -59,23 +64,6 @@ public: // Or a location in the machine frame. MachineLocation Loc; - bool operator==(const Value &other) const { - if (EntryKind != other.EntryKind) - return false; - - switch (EntryKind) { - case E_Location: - return Loc == other.Loc; - case E_Integer: - return Constant.Int == other.Constant.Int; - case E_ConstantFP: - return Constant.CFP == other.Constant.CFP; - case E_ConstantInt: - return Constant.CIP == other.Constant.CIP; - } - llvm_unreachable("unhandled EntryKind"); - } - bool isLocation() const { return EntryKind == E_Location; } bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } @@ -84,40 +72,114 @@ public: const ConstantFP *getConstantFP() const { return Constant.CFP; } const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } - const MDNode *getVariable() const { return Variable; } + const MDNode *getVariableNode() const { return Variable; } + DIVariable getVariable() const { return DIVariable(Variable); } + bool isVariablePiece() const { return getExpression().isVariablePiece(); } + DIExpression getExpression() const { return DIExpression(Expression); } + friend bool operator==(const Value &, const Value &); + friend bool operator<(const Value &, const Value &); }; + private: - /// A list of locations/constants belonging to this entry. + /// A nonempty list of locations/constants belonging to this entry, + /// sorted by offset. SmallVector<Value, 1> Values; - /// The compile unit that this location entry is referenced by. - const DwarfCompileUnit *Unit; - public: - DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {} - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, - Value Val, const DwarfCompileUnit *U) - : Begin(B), End(E), Unit(U) { + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val) + : Begin(B), End(E) { Values.push_back(std::move(Val)); } + /// \brief If this and Next are describing different pieces of the same + // variable, merge them by appending Next's values to the current + // list of values. + // Return true if the merge was successful. + bool MergeValues(const DebugLocEntry &Next) { + if (Begin == Next.Begin) { + DIExpression Expr(Values[0].Expression); + DIVariable Var(Values[0].Variable); + DIExpression NextExpr(Next.Values[0].Expression); + DIVariable NextVar(Next.Values[0].Variable); + if (Var == NextVar && Expr.isVariablePiece() && + NextExpr.isVariablePiece()) { + addValues(Next.Values); + End = Next.End; + return true; + } + } + return false; + } + /// \brief Attempt to merge this DebugLocEntry with Next and return /// true if the merge was successful. Entries can be merged if they /// share the same Loc/Constant and if Next immediately follows this /// Entry. - bool Merge(const DebugLocEntry &Next) { + bool MergeRanges(const DebugLocEntry &Next) { + // If this and Next are describing the same variable, merge them. if ((End == Next.Begin && Values == Next.Values)) { End = Next.End; return true; } return false; } + const MCSymbol *getBeginSym() const { return Begin; } const MCSymbol *getEndSym() const { return End; } - const DwarfCompileUnit *getCU() const { return Unit; } - const ArrayRef<Value> getValues() const { return Values; } - void addValue(Value Val) { Values.push_back(Val); } + ArrayRef<Value> getValues() const { return Values; } + void addValues(ArrayRef<DebugLocEntry::Value> Vals) { + Values.append(Vals.begin(), Vals.end()); + sortUniqueValues(); + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){ + return V.isVariablePiece(); + }) && "value must be a piece"); + } + + // Sort the pieces by offset. + // Remove any duplicate entries by dropping all but the first. + void sortUniqueValues() { + std::sort(Values.begin(), Values.end()); + Values.erase(std::unique(Values.begin(), Values.end(), + [](const Value &A, const Value &B) { + return A.getVariable() == B.getVariable() && + A.getExpression() == B.getExpression(); + }), + Values.end()); + } }; +/// Compare two Values for equality. +inline bool operator==(const DebugLocEntry::Value &A, + const DebugLocEntry::Value &B) { + if (A.EntryKind != B.EntryKind) + return false; + + if (A.Expression != B.Expression) + return false; + + if (A.Variable != B.Variable) + return false; + + switch (A.EntryKind) { + case DebugLocEntry::Value::E_Location: + return A.Loc == B.Loc; + case DebugLocEntry::Value::E_Integer: + return A.Constant.Int == B.Constant.Int; + case DebugLocEntry::Value::E_ConstantFP: + return A.Constant.CFP == B.Constant.CFP; + case DebugLocEntry::Value::E_ConstantInt: + return A.Constant.CIP == B.Constant.CIP; + } + llvm_unreachable("unhandled EntryKind"); +} + +/// Compare two pieces based on their offset. +inline bool operator<(const DebugLocEntry::Value &A, + const DebugLocEntry::Value &B) { + return A.getExpression().getPieceOffset() < + B.getExpression().getPieceOffset(); } + +} + #endif diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h index 7a51c7bd5b0c..0f1d2edd425c 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocList.h +++ b/lib/CodeGen/AsmPrinter/DebugLocList.h @@ -7,16 +7,18 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ -#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H -#include "llvm/MC/MCSymbol.h" -#include "llvm/ADT/SmallVector.h" #include "DebugLocEntry.h" +#include "llvm/ADT/SmallVector.h" namespace llvm { +class DwarfCompileUnit; +class MCSymbol; struct DebugLocList { MCSymbol *Label; + DwarfCompileUnit *CU; SmallVector<DebugLocEntry, 4> List; }; } diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index e9527c458af3..a71f35e1232d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -12,11 +12,12 @@ //===----------------------------------------------------------------------===// #include "DwarfAccelTable.h" -#include "DIE.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -174,7 +175,8 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, + MCSymbol *StrSym) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), @@ -183,13 +185,14 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset((*HI)->Data.StrSym, - D->getStringPool().getSectionSymbol()); + Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.Values.size()); for (HashDataContents *HD : (*HI)->Data.Values) { // Emit the DIE offset - Asm->EmitInt32(HD->Die->getOffset()); + DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit()); + assert(CU && "Accelerated DIE should belong to a CU."); + Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset()); // If we have multiple Atoms emit that info too. // FIXME: A bit of a hack, we either emit only one atom or all info. if (HeaderData.Atoms.size() > 1) { @@ -206,7 +209,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, + MCSymbol *StrSym) { // Emit the header. EmitHeader(Asm); @@ -220,7 +224,7 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { EmitOffsets(Asm, SecBegin); // Emit the hash data. - EmitData(Asm, D); + EmitData(Asm, D, StrSym); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index a3cc95f75b52..4a6085b46fc5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ -#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H -#include "DIE.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" @@ -62,7 +62,7 @@ namespace llvm { class AsmPrinter; -class DwarfFile; +class DwarfDebug; class DwarfAccelTable { @@ -140,7 +140,7 @@ public: private: struct TableHeaderData { uint32_t die_offset_base; - SmallVector<Atom, 1> Atoms; + SmallVector<Atom, 3> Atoms; TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} @@ -223,7 +223,7 @@ private: void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfFile *D); + void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -248,7 +248,7 @@ public: void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfFile *); + void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 74215aa695dd..0dc52dae8f39 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -51,7 +51,7 @@ void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) Asm->OutStreamer.EmitCFISections(false, true); - if (!Asm->MAI->isExceptionHandlingDwarf()) + if (!Asm->MAI->usesItaniumLSDAForExceptions()) return; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 000000000000..b4dba9c59236 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,836 @@ +#include "DwarfCompileUnit.h" +#include "DwarfExpression.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +namespace llvm { + +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), + Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) { + insertDIE(Node, &getUnitDie()); +} + +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + + // Don't use the address pool in non-fission or in the skeleton unit itself. + // FIXME: Once GDB supports this, it's probably worthwhile using the address + // pool from the skeleton - maybe even in non-fission (possibly fewer + // relocations by sharing them in the pool, but we have other ideas about how + // to reduce the number of relocations as well/instead). + if (!DD->useSplitDwarf() || !Skeleton) + return addLocalLabelAddress(Die, Attribute, Label); + + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + unsigned idx = DD->getAddressPool().getIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); +} + +void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, + dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + Die.addValue(Attribute, dwarf::DW_FORM_addr, + Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) + : new (DIEValueAllocator) DIEInteger(0)); +} + +unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If we print assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + return Asm->OutStreamer.EmitDwarfFileDirective( + 0, DirName, FileName, + Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); +} + +// Return const expression if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return nullptr; + + // First operand points to a global struct. + Value *Ptr = CE->getOperand(0); + if (!isa<GlobalValue>(Ptr) || + !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) + return nullptr; + + // Second operand is zero. + const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return nullptr; + + // Third operand is offset. + if (!isa<ConstantInt>(CE->getOperand(2))) + return nullptr; + + return CE; +} + +/// getOrCreateGlobalVariableDIE - get or create global variable DIE. +DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. + if (DIE *Die = getDIE(GV)) + return Die; + + assert(GV.isGlobalVariable()); + + DIScope GVContext = GV.getContext(); + DIType GTy = DD->resolve(GV.getType()); + + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + + // Add to map. + DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); + DIScope DeclContext; + + if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) { + DeclContext = resolve(SDMDecl.getContext()); + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + assert(GV.isDefinition()); + // We need the declaration DIE that is in the static member's class. + DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); + addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); + } else { + DeclContext = GV.getContext(); + // Add name and type. + addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(*VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) + addFlag(*VariableDIE, dwarf::DW_AT_external); + + // Add line number info. + addSourceLine(*VariableDIE, GV); + } + + if (!GV.isDefinition()) + addFlag(*VariableDIE, dwarf::DW_AT_declaration); + + // Add location. + bool addToAccelTable = false; + bool isGlobalVariable = GV.getGlobal() != nullptr; + if (isGlobalVariable) { + addToAccelTable = true; + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + } + + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + // Add the linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty()) + // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: + // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and + // TAG_variable. + addString(*VariableDIE, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); + } else if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(GV.getConstant())) { + addConstantValue(*VariableDIE, CI, GTy); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) { + addToAccelTable = true; + // GV is a merged global. + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + Value *Ptr = CE->getOperand(0); + MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(*Loc, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + } + + if (addToAccelTable) { + DD->addAccelName(GV.getName(), *VariableDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + DD->addAccelName(GV.getLinkageName(), *VariableDIE); + } + + addGlobalName(GV.getName(), *VariableDIE, DeclContext); + return VariableDIE; +} + +void DwarfCompileUnit::addRange(RangeSpan Range) { + bool SameAsPrevCU = this == DD->getPrevCU(); + DD->setPrevCU(this); + // If we have no current ranges just add the range and return, otherwise, + // check the current section and CU against the previous section and CU we + // emitted into and the subprogram was contained within. If these are the + // same then extend our current range, otherwise add this as a new range. + if (CURanges.empty() || !SameAsPrevCU || + (&CURanges.back().getEnd()->getSection() != + &Range.getEnd()->getSection())) { + CURanges.push_back(Range); + return; + } + + CURanges.back().setEnd(Range.getEnd()); +} + +void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, + const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + addLabel(Die, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Label); + else + addSectionDelta(Die, Attribute, Label, Sec); +} + +void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = + Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); + + stmtListIndex = UnitDie.getValues().size(); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, + DwarfLineSectionSym); +} + +void DwarfCompileUnit::applyStmtList(DIE &D) { + D.addValue(dwarf::DW_AT_stmt_list, + UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), + UnitDie.getValues()[stmtListIndex]); +} + +void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, + const MCSymbol *End) { + assert(Begin && "Begin label should not be null!"); + assert(End && "End label should not be null!"); + assert(Begin->isDefined() && "Invalid starting label"); + assert(End->isDefined() && "Invalid end label"); + + addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + if (DD->getDwarfVersion() < 4) + addLabelAddress(D, dwarf::DW_AT_high_pc, End); + else + addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} + +// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc +// and DW_AT_high_pc attributes. If there are global variables in this +// scope then create and insert DIEs for these variables. +DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) { + DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); + + attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym()); + if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( + *DD->getCurrentFunction())) + addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); + + // Only include DW_AT_frame_base in full debug info + if (!includeMinimalInlineScopes()) { + const TargetRegisterInfo *RI = + Asm->TM.getSubtargetImpl()->getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + if (RI->isPhysicalRegister(Location.getReg())) + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + } + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + DD->addSubprogramNames(SP, *SPDie); + + return *SPDie; +} + +// Construct a DIE for this scope. +void DwarfCompileUnit::constructScopeDIE( + LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren) { + if (!Scope || !Scope->getScopeNode()) + return; + + DIScope DS(Scope->getScopeNode()); + + assert((Scope->getInlinedAt() || !DS.isSubprogram()) && + "Only handle inlined subprograms here, use " + "constructSubprogramScopeDIE for non-inlined " + "subprograms"); + + SmallVector<std::unique_ptr<DIE>, 8> Children; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. + std::unique_ptr<DIE> ScopeDIE; + if (Scope->getParent() && DS.isSubprogram()) { + ScopeDIE = constructInlinedScopeDIE(Scope); + if (!ScopeDIE) + return; + // We create children when the scope DIE is not null. + createScopeChildrenDIE(Scope, Children); + } else { + // Early exit when we know the scope DIE is going to be null. + if (DD->isLexicalScopeDIENull(Scope)) + return; + + unsigned ChildScopeCount; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + createScopeChildrenDIE(Scope, Children, &ChildScopeCount); + + // Skip imported directives in gmlt-like data. + if (!includeMinimalInlineScopes()) { + // There is no need to emit empty lexical block DIE. + for (const auto &E : DD->findImportedEntitiesForScope(DS)) + Children.push_back( + constructImportedEntityDIE(DIImportedEntity(E.second))); + } + + // If there are only other scopes as children, put them directly in the + // parent instead, as this scope would serve no purpose. + if (Children.size() == ChildScopeCount) { + FinalChildren.insert(FinalChildren.end(), + std::make_move_iterator(Children.begin()), + std::make_move_iterator(Children.end())); + return; + } + ScopeDIE = constructLexicalScopeDIE(Scope); + assert(ScopeDIE && "Scope DIE should not be null."); + } + + // Add children + for (auto &I : Children) + ScopeDIE->addChild(std::move(I)); + + FinalChildren.push_back(std::move(ScopeDIE)); +} + +void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Value); +} + +void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, + SmallVector<RangeSpan, 2> Range) { + // Emit offset in .debug_range as a relocatable label. emitDIE will handle + // emitting it appropriately. + auto *RangeSectionSym = DD->getRangeSectionSym(); + + RangeSpanList List( + Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()), + std::move(Range)); + + // Under fission, ranges are specified by constant offsets relative to the + // CU's DW_AT_GNU_ranges_base. + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + + // Add the range list to the set of ranges to be emitted. + (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, SmallVector<RangeSpan, 2> Ranges) { + if (Ranges.size() == 1) { + const auto &single = Ranges.front(); + attachLowHighPC(Die, single.getStart(), single.getEnd()); + } else + addScopeRangeList(Die, std::move(Ranges)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) { + SmallVector<RangeSpan, 2> List; + List.reserve(Ranges.size()); + for (const InsnRange &R : Ranges) + List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), + DD->getLabelAfterInsn(R.second))); + attachRangesOrLowHighPC(Die, std::move(List)); +} + +// This scope represents inlined body of a function. Construct DIE to +// represent this concrete inlined copy of the function. +std::unique_ptr<DIE> +DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { + assert(Scope->getScopeNode()); + DIScope DS(Scope->getScopeNode()); + DISubprogram InlinedSP = getDISubprogram(DS); + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); + + auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine); + addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + // Add the call site information to the DIE. + DILocation DL(Scope->getInlinedAt()); + addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + DD->addSubprogramNames(InlinedSP, *ScopeDIE); + + return ScopeDIE; +} + +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +std::unique_ptr<DIE> +DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) { + if (DD->isLexicalScopeDIENull(Scope)) + return nullptr; + + auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + return ScopeDIE; +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, + bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); + DV.setDIE(*D); + return D; +} + +std::unique_ptr<DIE> +DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { + // Define variable debug information entry. + auto VariableDie = make_unique<DIE>(DV.getTag()); + + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV.getDotDebugLocOffset(); + if (Offset != ~0U) { + addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 4); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, *VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), + DV.getType()); + + return VariableDie; + } + + // .. else use frame index. + int FI = DV.getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = + Asm->TM.getSubtargetImpl()->getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, *VariableDie, Location); + } + + return VariableDie; +} + +std::unique_ptr<DIE> DwarfCompileUnit::constructVariableDIE( + DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) { + auto Var = constructVariableDIE(DV, Scope.isAbstractScope()); + if (DV.isObjectPointer()) + ObjectPointer = Var.get(); + return Var; +} + +DIE *DwarfCompileUnit::createScopeChildrenDIE( + LexicalScope *Scope, SmallVectorImpl<std::unique_ptr<DIE>> &Children, + unsigned *ChildScopeCount) { + DIE *ObjectPointer = nullptr; + + for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) + Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); + + unsigned ChildCountWithoutScopes = Children.size(); + + for (LexicalScope *LS : Scope->getChildren()) + constructScopeDIE(LS, Children); + + if (ChildScopeCount) + *ChildScopeCount = Children.size() - ChildCountWithoutScopes; + + return ObjectPointer; +} + +void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + DISubprogram Sub(Scope->getScopeNode()); + + assert(Sub.isSubprogram()); + + DD->getProcessedSPNodes().insert(Sub); + + DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); + + // If this is a variadic function, add an unspecified parameter. + DITypeArray FnArgs = Sub.getType().getTypeArray(); + + // Collect lexical scope children first. + // ObjectPointer might be a local (non-argument) local variable if it's a + // block's synthetic this pointer. + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) + addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + + // If we have a single element of null, it is a function that returns void. + // If we have more than one elements and the last one is null, it is a + // variadic function. + if (FnArgs.getNumElements() > 1 && + !FnArgs.getElement(FnArgs.getNumElements() - 1) && + !includeMinimalInlineScopes()) + ScopeDIE.addChild(make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters)); +} + +DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, + DIE &ScopeDIE) { + // We create children when the scope DIE is not null. + SmallVector<std::unique_ptr<DIE>, 8> Children; + DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); + + // Add children + for (auto &I : Children) + ScopeDIE.addChild(std::move(I)); + + return ObjectPointer; +} + +void +DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { + DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; + + DISubprogram SP(Scope->getScopeNode()); + + DIE *ContextDIE; + + if (includeMinimalInlineScopes()) + ContextDIE = &getUnitDie(); + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the DIDescriptor is not associated with the + // DIE (since the DIDescriptor will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + ContextDIE = &getUnitDie(); + getOrCreateSubprogramDIE(SPDecl); + } else + ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + + // Passing null as the associated DIDescriptor because the abstract definition + // shouldn't be found by lookup. + AbsDef = + &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor()); + applySubprogramAttributesToDefinition(SP, *AbsDef); + + if (!includeMinimalInlineScopes()) + addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef)) + addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); +} + +std::unique_ptr<DIE> +DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) { + assert(Module.Verify() && + "Use one of the MDNode * overloads to handle invalid metadata"); + std::unique_ptr<DIE> IMDie = make_unique<DIE>((dwarf::Tag)Module.getTag()); + insertDIE(Module, IMDie.get()); + DIE *EntityDie; + DIDescriptor Entity = resolve(Module.getEntity()); + if (Entity.isNameSpace()) + EntityDie = getOrCreateNameSpace(DINameSpace(Entity)); + else if (Entity.isSubprogram()) + EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity)); + else if (Entity.isType()) + EntityDie = getOrCreateTypeDIE(DIType(Entity)); + else if (Entity.isGlobalVariable()) + EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity)); + else + EntityDie = getDIE(Entity); + assert(EntityDie); + addSourceLine(*IMDie, Module.getLineNumber(), + Module.getContext().getFilename(), + Module.getContext().getDirectory()); + addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); + StringRef Name = Module.getName(); + if (!Name.empty()) + addString(*IMDie, dwarf::DW_AT_name, Name); + + return IMDie; +} + +void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) { + DIE *D = getDIE(SP); + if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + if (!D && !includeMinimalInlineScopes()) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. (except in -gmlt ^ where we want + // to omit these entirely) + D = getOrCreateSubprogramDIE(SP); + if (D) + // And attach the attributes + applySubprogramAttributesToDefinition(SP, *D); + } +} +void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { + assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram"); + assert(SP.isDefinition() && + "CU's subprogram list contains a subprogram declaration"); + DIArray Variables = SP.getVariables(); + if (Variables.getNumElements() == 0) + return; + + DIE *SPDIE = DU->getAbstractSPDies().lookup(SP); + if (!SPDIE) + SPDIE = getDIE(SP); + assert(SPDIE); + for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { + DIVariable DV(Variables.getElement(vi)); + assert(DV.isVariable()); + DbgVariable NewVar(DV, DIExpression(nullptr), DD); + auto VariableDie = constructVariableDIE(NewVar); + applyVariableAttributes(NewVar, *VariableDie); + SPDIE->addChild(std::move(VariableDie)); + } +} + +void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const { + // Don't bother labeling the .dwo unit, as its offset isn't used. + if (!Skeleton) + Asm->OutStreamer.EmitLabel(LabelBegin); + + DwarfUnit::emitHeader(ASectionSym); +} + +/// addGlobalName - Add a new global name to the compile unit. +void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die, + DIScope Context) { + if (includeMinimalInlineScopes()) + return; + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = &Die; +} + +/// Add a new global type to the unit. +void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die, + DIScope Context) { + if (includeMinimalInlineScopes()) + return; + std::string FullName = getParentContextString(Context) + Ty.getName().str(); + GlobalTypes[FullName] = &Die; +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location) { + if (DV.variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV.isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); +} + +/// Add an address attribute to a die based on the location provided. +void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location, + bool Indirect) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + + bool validReg; + if (Location.isReg() && !Indirect) + validReg = addRegisterOpPiece(*Loc, Location.getReg()); + else + validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + if (!validReg) + return; + + if (!Location.isReg() && Indirect) + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Loc); +} + +/// Start with the address based on the location provided, and generate the +/// DWARF information necessary to find the actual variable given the extra +/// address information encoded in the DbgVariable, starting from the starting +/// location. Add the DWARF information to the die. +void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DIExpression Expr = DV.getExpression(); + if (Location.getOffset()) { + if (DwarfExpr.AddMachineRegIndirect(Location.getReg(), + Location.getOffset())) { + DwarfExpr.AddExpression(Expr); + assert(!DV.getVariable().isIndirect() + && "double indirection not handled"); + } + } else { + if (DwarfExpr.AddMachineRegExpression(Expr, Location.getReg())) + if (DV.getVariable().isIndirect()) + DwarfExpr.EmitOp(dwarf::DW_OP_deref); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Loc); +} + +/// Add a Dwarf loclistptr attribute data and value. +void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, + unsigned Index) { + DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); + dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4; + Die.addValue(Attribute, Form, Value); +} + +void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, Var.getVariable()); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); +} + +/// Add a Dwarf expression attribute data and value. +void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, + const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die.addValue((dwarf::Attribute)0, Form, Value); +} + +void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP, + DIE &SPDie) { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); + applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); + addGlobalName(SP.getName(), SPDie, Context); +} + +bool DwarfCompileUnit::isDwoUnit() const { + return DD->useSplitDwarf() && Skeleton; +} + +bool DwarfCompileUnit::includeMinimalInlineScopes() const { + return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly || + (DD->useSplitDwarf() && !Skeleton); +} +} // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 000000000000..91164bc7aa2c --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,250 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DwarfUnit.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Dwarf.h" + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; +class DwarfFile; +class MCSymbol; +class LexicalScope; + +class DwarfCompileUnit : public DwarfUnit { + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding + /// the need to search for it in applyStmtList. + unsigned stmtListIndex; + + /// Skeleton unit associated with this unit. + DwarfCompileUnit *Skeleton; + + /// A label at the start of the non-dwo section related to this unit. + MCSymbol *SectionSym; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + /// GlobalNames - A map of globally visible named entities for this unit. + StringMap<const DIE *> GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + StringMap<const DIE *> GlobalTypes; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector<RangeSpanList, 1> CURangeLists; + + // List of ranges for a given compile unit. + SmallVector<RangeSpan, 2> CURanges; + + // The base address of this unit, if any. Used for relative references in + // ranges/locs. + const MCSymbol *BaseAddress; + + /// \brief Construct a DIE for the given DbgVariable without initializing the + /// DbgVariable's DIE reference. + std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract); + + bool isDwoUnit() const override; + + bool includeMinimalInlineScopes() const; + +public: + DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + DwarfCompileUnit *getSkeleton() const { + return Skeleton; + } + + void initStmtList(MCSymbol *DwarfLineSectionSym); + + /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. + void applyStmtList(DIE &D); + + /// getOrCreateGlobalVariableDIE - get or create global variable DIE. + DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addLocalLabelAddress - Add a dwarf label attribute data and value using + /// DW_FORM_addr only. + void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + DwarfCompileUnit &getCU() override { return *this; } + + unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range); + + void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec); + + /// \brief Find DIE for the given subprogram and attach appropriate + /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global + /// variables in this scope then create and insert DIEs for these + /// variables. + DIE &updateSubprogramScopeDIE(DISubprogram SP); + + void constructScopeDIE(LexicalScope *Scope, + SmallVectorImpl<std::unique_ptr<DIE>> &FinalChildren); + + /// \brief A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range); + + void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges); + + void attachRangesOrLowHighPC(DIE &D, + const SmallVectorImpl<InsnRange> &Ranges); + /// \brief This scope represents inlined body of a function. Construct + /// DIE to represent this concrete inlined copy of the function. + std::unique_ptr<DIE> constructInlinedScopeDIE(LexicalScope *Scope); + + /// \brief Construct new DW_TAG_lexical_block for this scope and + /// attach DW_AT_low_pc/DW_AT_high_pc labels. + std::unique_ptr<DIE> constructLexicalScopeDIE(LexicalScope *Scope); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV, + bool Abstract = false); + + std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV, + const LexicalScope &Scope, + DIE *&ObjectPointer); + + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(LexicalScope *Scope, + SmallVectorImpl<std::unique_ptr<DIE>> &Children, + unsigned *ChildScopeCount = nullptr); + + /// \brief Construct a DIE for this subprogram scope. + void constructSubprogramScopeDIE(LexicalScope *Scope); + + DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); + + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + + /// \brief Construct import_module DIE. + std::unique_ptr<DIE> + constructImportedEntityDIE(const DIImportedEntity &Module); + + void finishSubprogramDefinition(DISubprogram SP); + + void collectDeadVariables(DISubprogram SP); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } + + MCSymbol *getSectionSym() const { + assert(Section); + return SectionSym; + } + + /// Pass in the SectionSym even though we could recreate it in every compile + /// unit (type units will have actually distinct symbols once they're in + /// comdat sections). + void initSection(const MCSection *Section, MCSymbol *SectionSym) { + DwarfUnit::initSection(Section); + this->SectionSym = SectionSym; + + // Don't bother labeling the .dwo unit, as its offset isn't used. + if (!Skeleton) + LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + } + + unsigned getLength() { + return sizeof(uint32_t) + // Length field + getHeaderSize() + UnitDie.getSize(); + } + + void emitHeader(const MCSymbol *ASectionSym) const override; + + MCSymbol *getLabelBegin() const { + assert(Section); + return LabelBegin; + } + + /// Add a new global name to the compile unit. + void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override; + + /// Add a new global type to the compile unit. + void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override; + + const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } + + /// Add DW_AT_location attribute for a DbgVariable based on provided + /// MachineLocation. + void addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location); + /// Add an address attribute to a die based on the location provided. + void addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect = false); + + /// Start with the address based on the location provided, and generate the + /// DWARF information necessary to find the actual variable (navigating the + /// extra location information encoded in the type) based on the starting + /// location. Add the DWARF information to the die. + void addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Add a Dwarf loclistptr attribute data and value. + void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + + /// Add a Dwarf expression attribute data and value. + void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + + void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl<RangeSpanList> &getRangeLists() const { + return (Skeleton ? Skeleton : this)->CURangeLists; + } + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } + SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); } + + void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; } + const MCSymbol *getBaseAddress() const { return BaseAddress; } +}; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index ac1c0ffb3cc0..a587b46c4ba8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -11,15 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "DwarfDebug.h" -#include "DIE.h" +#include "ByteStreamer.h" #include "DIEHash.h" +#include "DwarfCompileUnit.h" +#include "DwarfExpression.h" #include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" @@ -48,6 +50,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "dwarfdebug" @@ -150,7 +153,7 @@ DIType DbgVariable::getType() const { if (tag == dwarf::DW_TAG_pointer_type) subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); - DIArray Elements = DICompositeType(subType).getTypeArray(); + DIArray Elements = DICompositeType(subType).getElements(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDerivedType DT(Elements.getElement(i)); if (getName() == DT.getName()) @@ -166,10 +169,11 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr), - GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator), + : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0), + InfoHolder(A, *this, "info_string", DIEValueAllocator), UsedNonDefaultText(false), - SkeletonHolder(A, "skel_string", DIEValueAllocator), + SkeletonHolder(A, *this, "skel_string", DIEValueAllocator), + IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -189,8 +193,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Turn on accelerator tables for Darwin by default, pubnames by // default for non-Darwin, and handle split dwarf. - bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); - if (DwarfAccelTables == Default) HasDwarfAccelTables = IsDarwin; else @@ -309,26 +311,6 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { return false; } -// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc -// and DW_AT_high_pc attributes. If there are global variables in this -// scope then create and insert DIEs for these variables. -DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, - DISubprogram SP) { - DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP); - - attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym); - - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); - - // Add name to the name table, we do this here because we're guaranteed - // to have concrete versions of our DW_TAG_subprogram nodes. - addSubprogramNames(SP, *SPDie); - - return *SPDie; -} - /// Check whether we should create a DIE for the given Scope, return true /// if we don't create a DIE (the corresponding DIE is null). bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { @@ -345,271 +327,30 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { // We don't create a DIE if we have a single Range and the end label // is null. - SmallVectorImpl<InsnRange>::const_iterator RI = Ranges.begin(); - MCSymbol *End = getLabelAfterInsn(RI->second); - return !End; -} - -static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D, - dwarf::Attribute A, const MCSymbol *L, - const MCSymbol *Sec) { - if (Asm.MAI->doesDwarfUseRelocationsAcrossSections()) - U.addSectionLabel(D, A, L); - else - U.addSectionDelta(D, A, L, Sec); -} - -void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, - const SmallVectorImpl<InsnRange> &Range) { - // Emit offset in .debug_range as a relocatable label. emitDIE will handle - // emitting it appropriately. - MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++); - - // Under fission, ranges are specified by constant offsets relative to the - // CU's DW_AT_GNU_ranges_base. - if (useSplitDwarf()) - TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym, - DwarfDebugRangeSectionSym); - else - addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, - DwarfDebugRangeSectionSym); - - RangeSpanList List(RangeSym); - for (const InsnRange &R : Range) { - RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second)); - List.addRange(std::move(Span)); - } - - // Add the range list to the set of ranges to be emitted. - TheCU.addRangeList(std::move(List)); -} - -void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die, - const SmallVectorImpl<InsnRange> &Ranges) { - assert(!Ranges.empty()); - if (Ranges.size() == 1) - attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first), - getLabelAfterInsn(Ranges.front().second)); - else - addScopeRangeList(TheCU, Die, Ranges); -} - -// Construct new DW_TAG_lexical_block for this scope and attach -// DW_AT_low_pc/DW_AT_high_pc labels. -std::unique_ptr<DIE> -DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - if (isLexicalScopeDIENull(Scope)) - return nullptr; - - auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block); - if (Scope->isAbstractScope()) - return ScopeDIE; - - attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); - - return ScopeDIE; + return !getLabelAfterInsn(Ranges.front().second); } -// This scope represents inlined body of a function. Construct DIE to -// represent this concrete inlined copy of the function. -std::unique_ptr<DIE> -DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - assert(Scope->getScopeNode()); - DIScope DS(Scope->getScopeNode()); - DISubprogram InlinedSP = getDISubprogram(DS); - // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram - // was inlined from another compile unit. - DIE *OriginDIE = AbstractSPDies[InlinedSP]; - assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); - - auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine); - TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); - - attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); - - InlinedSubprogramDIEs.insert(OriginDIE); - - // Add the call site information to the DIE. - DILocation DL(Scope->getInlinedAt()); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, - TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); - - // Add name to the name table, we do this here because we're guaranteed - // to have concrete versions of our DW_TAG_inlined_subprogram nodes. - addSubprogramNames(InlinedSP, *ScopeDIE); - - return ScopeDIE; -} - -static std::unique_ptr<DIE> constructVariableDIE(DwarfCompileUnit &TheCU, - DbgVariable &DV, - const LexicalScope &Scope, - DIE *&ObjectPointer) { - auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope()); - if (DV.isObjectPointer()) - ObjectPointer = Var.get(); - return Var; -} - -DIE *DwarfDebug::createScopeChildrenDIE( - DwarfCompileUnit &TheCU, LexicalScope *Scope, - SmallVectorImpl<std::unique_ptr<DIE>> &Children) { - DIE *ObjectPointer = nullptr; - - // Collect arguments for current function. - if (LScopes.isCurrentFunctionScope(Scope)) { - for (DbgVariable *ArgDV : CurrentFnArguments) - if (ArgDV) - Children.push_back( - constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer)); - - // If this is a variadic function, add an unspecified parameter. - DISubprogram SP(Scope->getScopeNode()); - DIArray FnArgs = SP.getType().getTypeArray(); - if (FnArgs.getElement(FnArgs.getNumElements() - 1) - .isUnspecifiedParameter()) { - Children.push_back( - make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters)); - } - } - - // Collect lexical scope children first. - for (DbgVariable *DV : ScopeVariables.lookup(Scope)) - Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer)); - - for (LexicalScope *LS : Scope->getChildren()) - if (std::unique_ptr<DIE> Nested = constructScopeDIE(TheCU, LS)) - Children.push_back(std::move(Nested)); - return ObjectPointer; +template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) { + F(CU); + if (auto *SkelCU = CU.getSkeleton()) + F(*SkelCU); } -void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU, - LexicalScope *Scope, DIE &ScopeDIE) { - // We create children when the scope DIE is not null. - SmallVector<std::unique_ptr<DIE>, 8> Children; - if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children)) - TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); - - // Add children - for (auto &I : Children) - ScopeDIE.addChild(std::move(I)); -} - -void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { +void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); - DISubprogram SP(Scope->getScopeNode()); + const MDNode *SP = Scope->getScopeNode(); ProcessedSPNodes.insert(SP); - DIE *&AbsDef = AbstractSPDies[SP]; - if (AbsDef) - return; - // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DwarfCompileUnit &SPCU = *SPMap[SP]; - DIE *ContextDIE; - - // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with - // the important distinction that the DIDescriptor is not associated with the - // DIE (since the DIDescriptor will be associated with the concrete DIE, if - // any). It could be refactored to some common utility function. - if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { - ContextDIE = &SPCU.getUnitDie(); - SPCU.getOrCreateSubprogramDIE(SPDecl); - } else - ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext())); - - // Passing null as the associated DIDescriptor because the abstract definition - // shouldn't be found by lookup. - AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, - DIDescriptor()); - SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef); - - SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - createAndAddScopeChildren(SPCU, Scope, *AbsDef); -} - -DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - assert(Scope && Scope->getScopeNode()); - assert(!Scope->getInlinedAt()); - assert(!Scope->isAbstractScope()); - DISubprogram Sub(Scope->getScopeNode()); - - assert(Sub.isSubprogram()); - - ProcessedSPNodes.insert(Sub); - - DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub); - - createAndAddScopeChildren(TheCU, Scope, ScopeDIE); - - return ScopeDIE; -} - -// Construct a DIE for this scope. -std::unique_ptr<DIE> DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - if (!Scope || !Scope->getScopeNode()) - return nullptr; - - DIScope DS(Scope->getScopeNode()); - - assert((Scope->getInlinedAt() || !DS.isSubprogram()) && - "Only handle inlined subprograms here, use " - "constructSubprogramScopeDIE for non-inlined " - "subprograms"); - - SmallVector<std::unique_ptr<DIE>, 8> Children; - - // We try to create the scope DIE first, then the children DIEs. This will - // avoid creating un-used children then removing them later when we find out - // the scope DIE is null. - std::unique_ptr<DIE> ScopeDIE; - if (Scope->getParent() && DS.isSubprogram()) { - ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); - if (!ScopeDIE) - return nullptr; - // We create children when the scope DIE is not null. - createScopeChildrenDIE(TheCU, Scope, Children); - } else { - // Early exit when we know the scope DIE is going to be null. - if (isLexicalScopeDIENull(Scope)) - return nullptr; - - // We create children here when we know the scope DIE is not going to be - // null and the children will be added to the scope DIE. - createScopeChildrenDIE(TheCU, Scope, Children); - - // There is no need to emit empty lexical block DIE. - std::pair<ImportedEntityMap::const_iterator, - ImportedEntityMap::const_iterator> Range = - std::equal_range(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), - std::pair<const MDNode *, const MDNode *>(DS, nullptr), - less_first()); - if (Children.empty() && Range.first == Range.second) - return nullptr; - ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); - assert(ScopeDIE && "Scope DIE should not be null."); - for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; - ++i) - constructImportedEntityDIE(TheCU, i->second, *ScopeDIE); - } - - // Add children - for (auto &I : Children) - ScopeDIE->addChild(std::move(I)); - - return ScopeDIE; + auto &CU = SPMap[SP]; + forBothCUs(*CU, [&](DwarfCompileUnit &CU) { + CU.constructAbstractSubprogramScopeDIE(Scope); + }); } void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { @@ -630,6 +371,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; DIE &Die = NewCU.getUnitDie(); InfoHolder.addUnit(std::move(OwnedUnit)); + if (useSplitDwarf()) + NewCU.setSkeleton(constructSkeletonCU(NewCU)); // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table @@ -666,14 +409,10 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - if (!FirstCU) - FirstCU = &NewCU; - - if (useSplitDwarf()) { + if (useSplitDwarf()) NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), DwarfInfoDWOSectionSym); - NewCU.setSkeleton(constructSkeletonCU(NewCU)); - } else + else NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), DwarfInfoSectionSym); @@ -682,44 +421,12 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { return NewCU; } -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const MDNode *N) { +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const MDNode *N) { DIImportedEntity Module(N); assert(Module.Verify()); if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext())) - constructImportedEntityDIE(TheCU, Module, *D); -} - -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const MDNode *N, DIE &Context) { - DIImportedEntity Module(N); - assert(Module.Verify()); - return constructImportedEntityDIE(TheCU, Module, Context); -} - -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const DIImportedEntity &Module, - DIE &Context) { - assert(Module.Verify() && - "Use one of the MDNode * overloads to handle invalid metadata"); - DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module); - DIE *EntityDie; - DIDescriptor Entity = resolve(Module.getEntity()); - if (Entity.isNameSpace()) - EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity)); - else if (Entity.isSubprogram()) - EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity)); - else if (Entity.isType()) - EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity)); - else - EntityDie = TheCU.getDIE(Entity); - TheCU.addSourceLine(IMDie, Module.getLineNumber(), - Module.getContext().getFilename(), - Module.getContext().getDirectory()); - TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = Module.getName(); - if (!Name.empty()) - TheCU.addString(IMDie, dwarf::DW_AT_name, Name); + D->addChild(TheCU.constructImportedEntityDIE(Module)); } // Emit all Dwarf sections that should come prior to the content. Create @@ -731,8 +438,8 @@ void DwarfDebug::beginModule() { const Module *M = MMI->getModule(); - // If module has named metadata anchors then use them, otherwise scan the - // module using debug info finder to collect debug info. + FunctionDIs = makeSubprogramMap(*M); + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -755,13 +462,18 @@ void DwarfDebug::beginModule() { ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); + CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU.getOrCreateTypeDIE(EnumTypes.getElement(i)); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) { + DIType Ty(EnumTypes.getElement(i)); + // The enum types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + DIType UniqueTy(resolve(Ty.getRef())); + CU.getOrCreateTypeDIE(UniqueTy); + } DIArray RetainedTypes = CUNode.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { DIType Ty(RetainedTypes.getElement(i)); @@ -773,7 +485,7 @@ void DwarfDebug::beginModule() { // Emit imported_modules last so that the relevant context is already // available. for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) - constructImportedEntityDIE(CU, ImportedEntities.getElement(i)); + constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i)); } // Tell MMI that we have debug info. @@ -786,9 +498,7 @@ void DwarfDebug::beginModule() { void DwarfDebug::finishVariableDefinitions() { for (const auto &Var : ConcreteVariables) { DIE *VariableDie = Var->getDIE(); - // FIXME: There shouldn't be any variables without DIEs. - if (!VariableDie) - continue; + assert(VariableDie); // FIXME: Consider the time-space tradeoff of just storing the unit pointer // in the ConcreteVariables list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. @@ -804,36 +514,10 @@ void DwarfDebug::finishVariableDefinitions() { } void DwarfDebug::finishSubprogramDefinitions() { - const Module *M = MMI->getModule(); - - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit TheCU(N); - // Construct subprogram DIE and add variables DIEs. - DwarfCompileUnit *SPCU = - static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); - DIArray Subprograms = TheCU.getSubprograms(); - for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { - DISubprogram SP(Subprograms.getElement(i)); - // Perhaps the subprogram is in another CU (such as due to comdat - // folding, etc), in which case ignore it here. - if (SPMap[SP] != SPCU) - continue; - DIE *D = SPCU->getDIE(SP); - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { - if (D) - // If this subprogram has an abstract definition, reference that - SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); - } else { - if (!D) - // Lazily construct the subprogram if we didn't see either concrete or - // inlined versions during codegen. - D = SPCU->getOrCreateSubprogramDIE(SP); - // And attach the attributes - SPCU->applySubprogramAttributesToDefinition(SP, *D); - } - } - } + for (const auto &P : SPMap) + forBothCUs(*P.second, [&](DwarfCompileUnit &CU) { + CU.finishSubprogramDefinition(DISubprogram(P.first)); + }); } @@ -853,26 +537,7 @@ void DwarfDebug::collectDeadVariables() { DISubprogram SP(Subprograms.getElement(i)); if (ProcessedSPNodes.count(SP) != 0) continue; - assert(SP.isSubprogram() && - "CU's subprogram list contains a non-subprogram"); - assert(SP.isDefinition() && - "CU's subprogram list contains a subprogram declaration"); - DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) - continue; - - DIE *SPDIE = AbstractSPDies.lookup(SP); - if (!SPDIE) - SPDIE = SPCU->getDIE(SP); - assert(SPDIE); - for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { - DIVariable DV(Variables.getElement(vi)); - assert(DV.isVariable()); - DbgVariable NewVar(DV, this); - auto VariableDie = SPCU->constructVariableDIE(NewVar); - SPCU->applyVariableAttributes(NewVar, *VariableDie); - SPDIE->addChild(std::move(VariableDie)); - } + SPCU->collectDeadVariables(SP); } } } @@ -888,66 +553,52 @@ void DwarfDebug::finalizeModuleInfo() { // Handle anything that needs to be done on a per-unit basis after // all other generation. - for (const auto &TheU : getUnits()) { + for (const auto &P : CUMap) { + auto &TheCU = *P.second; // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. - TheU->constructContainingTypeDIEs(); + TheCU.constructContainingTypeDIEs(); // Add CU specific attributes if we need to add any. - if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) { - // If we're splitting the dwarf out now that we've got the entire - // CU then add the dwo id to it. - DwarfCompileUnit *SkCU = - static_cast<DwarfCompileUnit *>(TheU->getSkeleton()); - if (useSplitDwarf()) { - // Emit a unique identifier for this CU. - uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie()); - TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - - // We don't keep track of which addresses are used in which CU so this - // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty()) - addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), - dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym, - DwarfAddrSectionSym); - if (!TheU->getRangeLists().empty()) - addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), - dwarf::DW_AT_GNU_ranges_base, - DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym); - } + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + auto *SkCU = TheCU.getSkeleton(); + if (useSplitDwarf()) { + // Emit a unique identifier for this CU. + uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); + TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty()) + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, + DwarfAddrSectionSym, DwarfAddrSectionSym); + if (!SkCU->getRangeLists().empty()) + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym, + DwarfDebugRangeSectionSym); + } - // If we have code split among multiple sections or non-contiguous - // ranges of code then emit a DW_AT_ranges attribute on the unit that will - // remain in the .o file, otherwise add a DW_AT_low_pc. - // FIXME: We should use ranges allow reordering of code ala - // .subsections_via_symbols in mach-o. This would mean turning on - // ranges for all subprogram DIEs for mach-o. - DwarfCompileUnit &U = - SkCU ? *SkCU : static_cast<DwarfCompileUnit &>(*TheU); - unsigned NumRanges = TheU->getRanges().size(); - if (NumRanges) { - if (NumRanges > 1) { - addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges, - Asm->GetTempSymbol("cu_ranges", U.getUniqueID()), - DwarfDebugRangeSectionSym); - - // A DW_AT_low_pc attribute may also be specified in combination with - // DW_AT_ranges to specify the default base address for use in - // location lists (see Section 2.6.2) and range lists (see Section - // 2.17.3). - U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - 0); - } else { - RangeSpan &Range = TheU->getRanges().back(); - U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc, - Range.getStart()); - U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(), - Range.getStart()); - } - } + // If we have code split among multiple sections or non-contiguous + // ranges of code then emit a DW_AT_ranges attribute on the unit that will + // remain in the .o file, otherwise add a DW_AT_low_pc. + // FIXME: We should use ranges allow reordering of code ala + // .subsections_via_symbols in mach-o. This would mean turning on + // ranges for all subprogram DIEs for mach-o. + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + if (unsigned NumRanges = TheCU.getRanges().size()) { + if (NumRanges > 1) + // A DW_AT_low_pc attribute may also be specified in combination with + // DW_AT_ranges to specify the default base address for use in + // location lists (see Section 2.6.2) and range lists (see Section + // 2.17.3). + U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + else + TheCU.setBaseAddress(TheCU.getRanges().front().getStart()); + U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } } @@ -1009,7 +660,10 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); - if (!FirstCU) + // If we aren't actually generating debug info (check beginModule - + // conditionalized on !DisableDebugInfoPrinting and the presence of the + // llvm.dbg.cu metadata node) + if (!DwarfInfoSectionSym) return; // End any existing sections. @@ -1063,9 +717,6 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); AbstractVariables.clear(); - - // Reset these for the next Module if we have one. - FirstCU = nullptr; } // Find abstract variable, if any, associated with Var. @@ -1091,8 +742,8 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { void DwarfDebug::createAbstractVariable(const DIVariable &Var, LexicalScope *Scope) { - auto AbsDbgVariable = make_unique<DbgVariable>(Var, this); - addScopeVariable(Scope, AbsDbgVariable.get()); + auto AbsDbgVariable = make_unique<DbgVariable>(Var, DIExpression(), this); + InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); AbstractVariables[Var] = std::move(AbsDbgVariable); } @@ -1116,55 +767,35 @@ DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, createAbstractVariable(Cleansed, Scope); } -// If Var is a current function argument then add it to CurrentFnArguments list. -bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) { - if (!LScopes.isCurrentFunctionScope(Scope)) - return false; - DIVariable DV = Var->getVariable(); - if (DV.getTag() != dwarf::DW_TAG_arg_variable) - return false; - unsigned ArgNo = DV.getArgNumber(); - if (ArgNo == 0) - return false; - - size_t Size = CurrentFnArguments.size(); - if (Size == 0) - CurrentFnArguments.resize(CurFn->getFunction()->arg_size()); - // llvm::Function argument size is not good indicator of how many - // arguments does the function have at source level. - if (ArgNo > Size) - CurrentFnArguments.resize(ArgNo * 2); - CurrentFnArguments[ArgNo - 1] = Var; - return true; -} - // Collect variable information from side table maintained by MMI. void DwarfDebug::collectVariableInfoFromMMITable( - SmallPtrSet<const MDNode *, 16> &Processed) { + SmallPtrSetImpl<const MDNode *> &Processed) { for (const auto &VI : MMI->getVariableDbgInfo()) { if (!VI.Var) continue; Processed.insert(VI.Var); - DIVariable DV(VI.Var); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. if (!Scope) continue; + DIVariable DV(VI.Var); + DIExpression Expr(VI.Expr); ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, Expr, this)); DbgVariable *RegVar = ConcreteVariables.back().get(); RegVar->setFrameIndex(VI.Slot); - addScopeVariable(Scope, RegVar); + InfoHolder.addScopeVariable(Scope, RegVar); } } // Get .debug_loc entry for the instruction range starting at MI. static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { + const MDNode *Expr = MI->getDebugExpression(); const MDNode *Var = MI->getDebugVariable(); - assert(MI->getNumOperands() == 3); + assert(MI->getNumOperands() == 4); if (MI->getOperand(0).isReg()) { MachineLocation MLoc; // If the second operand is an immediate, this is a @@ -1173,24 +804,138 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - return DebugLocEntry::Value(Var, MLoc); + return DebugLocEntry::Value(Var, Expr, MLoc); } if (MI->getOperand(0).isImm()) - return DebugLocEntry::Value(Var, MI->getOperand(0).getImm()); + return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) - return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm()); + return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm()); if (MI->getOperand(0).isCImm()) - return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm()); + return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm()); - llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); + llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } -// Find variables for each lexical scope. +/// Determine whether two variable pieces overlap. +static bool piecesOverlap(DIExpression P1, DIExpression P2) { + if (!P1.isVariablePiece() || !P2.isVariablePiece()) + return true; + unsigned l1 = P1.getPieceOffset(); + unsigned l2 = P2.getPieceOffset(); + unsigned r1 = l1 + P1.getPieceSize(); + unsigned r2 = l2 + P2.getPieceSize(); + // True where [l1,r1[ and [r1,r2[ overlap. + return (l1 < r2) && (l2 < r1); +} + +/// Build the location list for all DBG_VALUEs in the function that +/// describe the same variable. If the ranges of several independent +/// pieces of the same variable overlap partially, split them up and +/// combine the ranges. The resulting DebugLocEntries are will have +/// strict monotonically increasing begin addresses and will never +/// overlap. +// +// Input: +// +// Ranges History [var, loc, piece ofs size] +// 0 | [x, (reg0, piece 0, 32)] +// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry +// 2 | | ... +// 3 | [clobber reg0] +// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x. +// +// Output: +// +// [0-1] [x, (reg0, piece 0, 32)] +// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)] +// [3-4] [x, (reg1, piece 32, 32)] +// [4- ] [x, (mem, piece 0, 64)] void -DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); +DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::InstrRanges &Ranges) { + SmallVector<DebugLocEntry::Value, 4> OpenRanges; + + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const MachineInstr *Begin = I->first; + const MachineInstr *End = I->second; + assert(Begin->isDebugValue() && "Invalid History entry"); + + // Check if a variable is inaccessible in this range. + if (Begin->getNumOperands() > 1 && + Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) { + OpenRanges.clear(); + continue; + } + + // If this piece overlaps with any open ranges, truncate them. + DIExpression DIExpr = Begin->getDebugExpression(); + auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](DebugLocEntry::Value R) { + return piecesOverlap(DIExpr, R.getExpression()); + }); + OpenRanges.erase(Last, OpenRanges.end()); + const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); + assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + + const MCSymbol *EndLabel; + if (End != nullptr) + EndLabel = getLabelAfterInsn(End); + else if (std::next(I) == Ranges.end()) + EndLabel = FunctionEndSym; + else + EndLabel = getLabelBeforeInsn(std::next(I)->first); + assert(EndLabel && "Forgot label after instruction ending a range!"); + + DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); + + auto Value = getDebugLocValue(Begin); + DebugLocEntry Loc(StartLabel, EndLabel, Value); + bool couldMerge = false; + + // If this is a piece, it may belong to the current DebugLocEntry. + if (DIExpr.isVariablePiece()) { + // Add this value to the list of open ranges. + OpenRanges.push_back(Value); + + // Attempt to add the piece to the last entry. + if (!DebugLoc.empty()) + if (DebugLoc.back().MergeValues(Loc)) + couldMerge = true; + } + + if (!couldMerge) { + // Need to add a new DebugLocEntry. Add all values from still + // valid non-overlapping pieces. + if (OpenRanges.size()) + Loc.addValues(OpenRanges); + + DebugLoc.push_back(std::move(Loc)); + } + + // Attempt to coalesce the ranges of two otherwise identical + // DebugLocEntries. + auto CurEntry = DebugLoc.rbegin(); + auto PrevEntry = std::next(CurEntry); + if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) + DebugLoc.pop_back(); + + DEBUG({ + dbgs() << CurEntry->getValues().size() << " Values:\n"; + for (auto Value : CurEntry->getValues()) { + Value.getVariable()->dump(); + Value.getExpression()->dump(); + } + dbgs() << "-----\n"; + }); + } +} + + +// Find variables for each lexical scope. +void +DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, + SmallPtrSetImpl<const MDNode *> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(Processed); @@ -1205,10 +950,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { continue; LexicalScope *Scope = nullptr; - if (DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(DV.getContext()).describes(CurFn->getFunction())) - Scope = LScopes.getCurrentFunctionScope(); - else if (MDNode *IA = DV.getInlinedAt()) { + if (MDNode *IA = DV.getInlinedAt()) { DebugLoc DL = DebugLoc::getFromDILocation(IA); Scope = LScopes.findInlinedScope(DebugLoc::get( DL.getLine(), DL.getCol(), DV.getContext(), IA)); @@ -1224,7 +966,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this)); DbgVariable *RegVar = ConcreteVariables.back().get(); - addScopeVariable(Scope, RegVar); + InfoHolder.addScopeVariable(Scope, RegVar); // Check if the first DBG_VALUE is valid for the rest of the function. if (Ranges.size() == 1 && Ranges.front().second == nullptr) @@ -1235,53 +977,26 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); DebugLocList &LocList = DotDebugLocEntries.back(); + LocList.CU = &TheCU; LocList.Label = Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); - SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List; - for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { - const MachineInstr *Begin = I->first; - const MachineInstr *End = I->second; - assert(Begin->isDebugValue() && "Invalid History entry"); - - // Check if a variable is unaccessible in this range. - if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && - !Begin->getOperand(0).getReg()) - continue; - DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin); - if (End != nullptr) - DEBUG(dbgs() << "\t" << *End); - else - DEBUG(dbgs() << "\tNULL\n"); - const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); - assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); - - const MCSymbol *EndLabel; - if (End != nullptr) - EndLabel = getLabelAfterInsn(End); - else if (std::next(I) == Ranges.end()) - EndLabel = FunctionEndSym; - else - EndLabel = getLabelBeforeInsn(std::next(I)->first); - assert(EndLabel && "Forgot label after instruction ending a range!"); - - DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); - if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) - DebugLoc.push_back(std::move(Loc)); - } + // Build the location list for this variable. + buildLocationList(LocList.List, Ranges); } // Collect info for variables that were optimized out. - DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); + DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); assert(DV.isVariable()); - if (!Processed.insert(DV)) + if (!Processed.insert(DV).second) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); - addScopeVariable(Scope, ConcreteVariables.back().get()); + DIExpression NoExpr; + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, NoExpr, this)); + InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get()); } } } @@ -1311,8 +1026,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END; PrologEndLoc = DebugLoc(); + Flags |= DWARF2_FLAG_IS_STMT; } - if (PrologEndLoc.isUnknown()) + if (DL.getLine() != + Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine()) Flags |= DWARF2_FLAG_IS_STMT; if (!DL.isUnknown()) { @@ -1402,8 +1119,12 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { for (const auto &MBB : *MF) for (const auto &MI : MBB) if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && - !MI.getDebugLoc().isUnknown()) + !MI.getDebugLoc().isUnknown()) { + // Did the target forget to set the FrameSetup flag for CFI insns? + assert(!MI.isCFIInstruction() && + "First non-frame-setup instruction is a CFI instruction."); return MI.getDebugLoc(); + } return DebugLoc(); } @@ -1416,6 +1137,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; + auto DI = FunctionDIs.find(MF->getFunction()); + if (DI == FunctionDIs.end()) + return; + // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. LScopes.initialize(*MF); @@ -1431,6 +1156,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + // FnScope->getScopeNode() and DI->second should represent the same function, + // though they may not be the same MDNode due to inline functions merged in + // LTO where the debug info metadata still differs (either due to distinct + // written differences - two versions of a linkonce_odr function + // written/copied into two separate files, or some sub-optimal metadata that + // isn't structurally identical (see: file path/name info from clang, which + // includes the directory of the cpp file being built, even when the file name + // is absolute (such as an <> lookup header))) DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); if (Asm->OutStreamer.hasRawTextSupport()) @@ -1445,7 +1178,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { Asm->OutStreamer.EmitLabel(FunctionBeginSym); // Calculate history for local variables. - calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues); + calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(), + DbgValues); // Request labels for the full history. for (const auto &I : DbgValues) { @@ -1455,10 +1189,24 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. - DIVariable DV(I.first); - if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && - getDISubprogram(DV.getContext()).describes(MF->getFunction())) + DIVariable DIVar(Ranges.front().first->getDebugVariable()); + if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; + if (Ranges.front().first->getDebugExpression().isVariablePiece()) { + // Mark all non-overlapping initial pieces. + for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { + DIExpression Piece = I->first->getDebugExpression(); + if (std::all_of(Ranges.begin(), I, + [&](DbgValueHistoryMap::InstrRange Pred) { + return !piecesOverlap(Piece, Pred.first->getDebugExpression()); + })) + LabelsBeforeInsn[I->first] = FunctionBeginSym; + else + break; + } + } + } for (const auto &Range : Ranges) { requestLabelBeforeInsn(Range.first); @@ -1484,55 +1232,16 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } } -void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { - if (addCurrentFnArgument(Var, LS)) - return; - SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; - DIVariable DV = Var->getVariable(); - // Variables with positive arg numbers are parameters. - if (unsigned ArgNum = DV.getArgNumber()) { - // Keep all parameters in order at the start of the variable list to ensure - // function types are correct (no out-of-order parameters) - // - // This could be improved by only doing it for optimized builds (unoptimized - // builds have the right order to begin with), searching from the back (this - // would catch the unoptimized case quickly), or doing a binary search - // rather than linear search. - SmallVectorImpl<DbgVariable *>::iterator I = Vars.begin(); - while (I != Vars.end()) { - unsigned CurNum = (*I)->getVariable().getArgNumber(); - // A local (non-parameter) variable has been found, insert immediately - // before it. - if (CurNum == 0) - break; - // A later indexed parameter has been found, insert immediately before it. - if (CurNum > ArgNum) - break; - ++I; - } - Vars.insert(I, Var); - return; - } - - Vars.push_back(Var); -} - // Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { - // Every beginFunction(MF) call should be followed by an endFunction(MF) call, - // though the beginFunction may not be called at all. - // We should handle both cases. - if (!CurFn) - CurFn = MF; - else - assert(CurFn == MF); - assert(CurFn != nullptr); + assert(CurFn == MF && + "endFunction should be called with the same function as beginFunction"); - if (!MMI->hasDebugInfo() || LScopes.empty()) { + if (!MMI->hasDebugInfo() || LScopes.empty() || + !FunctionDIs.count(MF->getFunction())) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. - PrevSection = nullptr; PrevCU = nullptr; CurFn = nullptr; return; @@ -1546,12 +1255,35 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DISubprogram SP(FnScope->getScopeNode()); + DwarfCompileUnit &TheCU = *SPMap.lookup(SP); + SmallPtrSet<const MDNode *, 16> ProcessedVars; - collectVariableInfo(ProcessedVars); + collectVariableInfo(TheCU, SP, ProcessedVars); - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode()); + // Add the range of this function to the list of ranges for the CU. + TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym)); + + // Under -gmlt, skip building the subprogram if there are no inlined + // subroutines inside it. + if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly && + LScopes.getAbstractScopesList().empty() && !IsDarwin) { + assert(InfoHolder.getScopeVariables().empty()); + assert(DbgValues.empty()); + // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed + // by a -gmlt CU. Add a test and remove this assertion. + assert(AbstractVariables.empty()); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); + PrevLabel = nullptr; + CurFn = nullptr; + return; + } +#ifndef NDEBUG + size_t NumAbstractScopes = LScopes.getAbstractScopesList().size(); +#endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { DISubprogram SP(AScope->getScopeNode()); @@ -1561,29 +1293,25 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); assert(DV && DV.isVariable()); - if (!ProcessedVars.insert(DV)) + if (!ProcessedVars.insert(DV).second) continue; ensureAbstractVariableIsCreated(DV, DV.getContext()); + assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes + && "ensureAbstractVariableIsCreated inserted abstract scopes"); } - constructAbstractSubprogramScopeDIE(TheCU, AScope); + constructAbstractSubprogramScopeDIE(AScope); } - DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope); - if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn)) - TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); - - // Add the range of this function to the list of ranges for the CU. - RangeSpan Span(FunctionBeginSym, FunctionEndSym); - TheCU.addRange(std::move(Span)); - PrevSection = Asm->getCurrentSection(); - PrevCU = &TheCU; + TheCU.constructSubprogramScopeDIE(FnScope); + if (auto *SkelCU = TheCU.getSkeleton()) + if (!LScopes.getAbstractScopesList().empty()) + SkelCU->constructSubprogramScopeDIE(FnScope); // Clear debug info // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) - ScopeVariables.clear(); - CurrentFnArguments.clear(); + InfoHolder.getScopeVariables().clear(); DbgValues.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); @@ -1603,8 +1331,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, assert(Scope.isScope()); Fn = Scope.getFilename(); Dir = Scope.getDirectory(); - if (Scope.isLexicalBlock()) - Discriminator = DILexicalBlock(S).getDiscriminator(); + if (Scope.isLexicalBlockFile()) + Discriminator = DILexicalBlockFile(S).getDiscriminator(); unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) @@ -1625,9 +1353,12 @@ void DwarfDebug::emitSectionLabels() { // Dwarf sections base addresses. DwarfInfoSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - if (useSplitDwarf()) + if (useSplitDwarf()) { DwarfInfoDWOSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); + DwarfTypesDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); + } DwarfAbbrevSectionSym = emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); if (useSplitDwarf()) @@ -1711,7 +1442,7 @@ void DwarfDebug::emitDIE(DIE &Die) { void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitUnits(this, DwarfAbbrevSectionSym); + Holder.emitUnits(DwarfAbbrevSectionSym); } // Emit the abbreviation section. @@ -1745,54 +1476,41 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -// Emit visible names into a hashed accelerator table section. -void DwarfDebug::emitAccelNames() { - AccelNames.FinalizeTable(Asm, "Names"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamesSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); +void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section, + StringRef TableName, StringRef SymName) { + Accel.FinalizeTable(Asm, TableName); + Asm->OutStreamer.SwitchSection(Section); + auto *SectionBegin = Asm->GetTempSymbol(SymName); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AccelNames.Emit(Asm, SectionBegin, &InfoHolder); + Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym); +} + +// Emit visible names into a hashed accelerator table section. +void DwarfDebug::emitAccelNames() { + emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), + "Names", "names_begin"); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - AccelObjC.FinalizeTable(Asm, "ObjC"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelObjCSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); - Asm->OutStreamer.EmitLabel(SectionBegin); - - // Emit the full data. - AccelObjC.Emit(Asm, SectionBegin, &InfoHolder); + emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), + "ObjC", "objc_begin"); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - AccelNamespace.FinalizeTable(Asm, "namespac"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamespaceSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); - Asm->OutStreamer.EmitLabel(SectionBegin); - - // Emit the full data. - AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder); + emitAccel(AccelNamespace, + Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), + "namespac", "namespac_begin"); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { - - AccelTypes.FinalizeTable(Asm, "types"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelTypesSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); - Asm->OutStreamer.EmitLabel(SectionBegin); - - // Emit the full data. - AccelTypes.Emit(Asm, SectionBegin, &InfoHolder); + emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), + "types", "types_begin"); } // Public name handling. @@ -1859,12 +1577,13 @@ void DwarfDebug::emitDebugPubNames(bool GnuStyle) { GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection(); - emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames); + emitDebugPubSection(GnuStyle, PSec, "Names", + &DwarfCompileUnit::getGlobalNames); } void DwarfDebug::emitDebugPubSection( bool GnuStyle, const MCSection *PSec, StringRef Name, - const StringMap<const DIE *> &(DwarfUnit::*Accessor)() const) { + const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) { for (const auto &NU : CUMap) { DwarfCompileUnit *TheU = NU.second; @@ -1873,7 +1592,7 @@ void DwarfDebug::emitDebugPubSection( if (Globals.empty()) continue; - if (auto Skeleton = static_cast<DwarfCompileUnit *>(TheU->getSkeleton())) + if (auto *Skeleton = TheU->getSkeleton()) TheU = Skeleton; unsigned ID = TheU->getUniqueID(); @@ -1895,7 +1614,7 @@ void DwarfDebug::emitDebugPubSection( Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4); + Asm->EmitInt32(TheU->getLength()); // Emit the pubnames for this compilation unit. for (const auto &GI : Globals) { @@ -1928,7 +1647,8 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection(); - emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes); + emitDebugPubSection(GnuStyle, PSec, "Types", + &DwarfCompileUnit::getGlobalTypes); } // Emit visible names into a debug str section. @@ -1937,61 +1657,86 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } +/// Emits an optimal (=sorted) sequence of DW_OP_pieces. +void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, + const DITypeIdentifierMap &Map, + ArrayRef<DebugLocEntry::Value> Values) { + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { + return P.isVariablePiece(); + }) && "all values are expected to be pieces"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "pieces are expected to be sorted"); + + unsigned Offset = 0; + for (auto Piece : Values) { + const unsigned SizeOfByte = 8; + DIExpression Expr = Piece.getExpression(); + unsigned PieceOffset = Expr.getPieceOffset(); + unsigned PieceSize = Expr.getPieceSize(); + assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); + if (Offset < PieceOffset) { + // The DWARF spec seriously mandates pieces with no locations for gaps. + Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*SizeOfByte); + Offset += PieceOffset-Offset; + } + Offset += PieceSize; + +#ifndef NDEBUG + DIVariable Var = Piece.getVariable(); + assert(!Var.isIndirect() && "indirect address for piece"); + unsigned VarSize = Var.getSizeInBits(Map); + assert(PieceSize+PieceOffset <= VarSize/SizeOfByte + && "piece is larger than or outside of variable"); + assert(PieceSize*SizeOfByte != VarSize + && "piece covers entire variable"); +#endif + + emitDebugLocValue(Streamer, Piece, PieceOffset*SizeOfByte); + } +} + + void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry) { - assert(Entry.getValues().size() == 1 && - "multi-value entries are not supported yet."); const DebugLocEntry::Value Value = Entry.getValues()[0]; - DIVariable DV(Value.getVariable()); + if (Value.isVariablePiece()) + // Emit all pieces that belong to the same variable and range. + return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues()); + + assert(Entry.getValues().size() == 1 && "only pieces may have >1 value"); + emitDebugLocValue(Streamer, Value); +} + +void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits) { + DIVariable DV = Value.getVariable(); + DebugLocDwarfExpression DwarfExpr(*Asm, Streamer); + + // Regular entry. if (Value.isInt()) { DIBasicType BTy(resolve(DV.getType())); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || - BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { - Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts"); - Streamer.EmitSLEB128(Value.getInt()); - } else { - Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); - Streamer.EmitULEB128(Value.getInt()); - } + BTy.getEncoding() == dwarf::DW_ATE_signed_char)) + DwarfExpr.AddSignedConstant(Value.getInt()); + else + DwarfExpr.AddUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); - if (!DV.hasComplexAddress()) + DIExpression Expr = Value.getExpression(); + if (!Expr || (Expr.getNumElements() == 0)) // Regular entry. Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); else { // Complex address entry. - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Loc.getOffset()) { - i = 2; - Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); - Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); - Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); - Streamer.EmitSLEB128(DV.getAddrElement(1)); - } else { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect()); - i = 2; - } - } else { - Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); - } - - // Emit remaining complex address elements. - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); - Streamer.EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - if (!Loc.isReg()) - Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); - } else - llvm_unreachable("unknown Opcode found in complex address"); - } + if (Loc.getOffset()) { + DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset()); + DwarfExpr.AddExpression(Expr, PieceOffsetInBits); + } else + DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(), + PieceOffsetInBits); + if (DV.isIndirect()) + DwarfExpr.EmitOp(dwarf::DW_OP_deref); } } // else ... ignore constant fp. There is not any good way to @@ -2020,14 +1765,12 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getDataLayout().getPointerSize(); for (const auto &DebugLoc : DotDebugLocEntries) { Asm->OutStreamer.EmitLabel(DebugLoc.Label); + const DwarfCompileUnit *CU = DebugLoc.CU; for (const auto &Entry : DebugLoc.List) { // Set up the range. This range is relative to the entry point of the // compile unit. This is a hard coded 0 for low_pc when we're emitting // ranges, or the DW_AT_low_pc on the compile unit otherwise. - const DwarfCompileUnit *CU = Entry.getCU(); - if (CU->getRanges().size() == 1) { - // Grab the begin symbol from the first range as our base. - const MCSymbol *Base = CU->getRanges()[0].getStart(); + if (auto *Base = CU->getBaseAddress()) { Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size); Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size); } else { @@ -2157,6 +1900,10 @@ void DwarfDebug::emitDebugARanges() { for (DwarfCompileUnit *CU : CUs) { std::vector<ArangeSpan> &List = Spans[CU]; + // Describe the skeleton CU's offset and length, not the dwo file's. + if (auto *Skel = CU->getSkeleton()) + CU = Skel; + // Emit size of content not including length itself. unsigned ContentSize = sizeof(int16_t) + // DWARF ARange version number @@ -2179,7 +1926,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym()); + Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); @@ -2223,6 +1970,9 @@ void DwarfDebug::emitDebugRanges() { for (const auto &I : CUMap) { DwarfCompileUnit *TheCU = I.second; + if (auto *Skel = TheCU->getSkeleton()) + TheCU = Skel; + // Iterate over the misc ranges for the compile units in the module. for (const RangeSpanList &List : TheCU->getRangeLists()) { // Emit our symbol so we can find the beginning of the range. @@ -2233,9 +1983,7 @@ void DwarfDebug::emitDebugRanges() { const MCSymbol *End = Range.getEnd(); assert(Begin && "Range without a begin symbol?"); assert(End && "Range without an end symbol?"); - if (TheCU->getRanges().size() == 1) { - // Grab the begin symbol from the first range as our base. - const MCSymbol *Base = TheCU->getRanges()[0].getStart(); + if (auto *Base = TheCU->getBaseAddress()) { Asm->EmitLabelDifference(Begin, Base, Size); Asm->EmitLabelDifference(End, Base, Size); } else { @@ -2248,23 +1996,6 @@ void DwarfDebug::emitDebugRanges() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitIntValue(0, Size); } - - // Now emit a range for the CU itself. - if (TheCU->getRanges().size() > 1) { - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID())); - for (const RangeSpan &Range : TheCU->getRanges()) { - const MCSymbol *Begin = Range.getStart(); - const MCSymbol *End = Range.getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - Asm->OutStreamer.EmitSymbolValue(Begin, Size); - Asm->OutStreamer.EmitSymbolValue(End, Size); - } - // And terminate the list with two 0 values. - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitIntValue(0, Size); - } } } @@ -2272,11 +2003,11 @@ void DwarfDebug::emitDebugRanges() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr<DwarfUnit> NewU) { - NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode().getSplitDebugFilename()); + NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, + U.getCUNode().getSplitDebugFilename()); if (!CompilationDir.empty()) - NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); addGnuPubAttributes(*NewU, Die); @@ -2301,31 +2032,13 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { return NewCU; } -// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name, -// DW_AT_addr_base. -DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) { - DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>( - *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]); - - auto OwnedUnit = make_unique<DwarfTypeUnit>(TU.getUniqueID(), CU, Asm, this, - &SkeletonHolder); - DwarfTypeUnit &NewTU = *OwnedUnit; - NewTU.setTypeSignature(TU.getTypeSignature()); - NewTU.setType(nullptr); - NewTU.initSection( - Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature())); - - initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit)); - return NewTU; -} - // Emit the .debug_info.dwo section for separated dwarf. This contains the // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); // Don't pass an abbrev symbol, using a constant zero instead so as not to // emit relocations into the dwo file. - InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr); + InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the @@ -2349,9 +2062,8 @@ void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); const MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); - const MCSymbol *StrSym = DwarfStrSectionSym; InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), - OffSec, StrSym); + OffSec); } MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { @@ -2391,9 +2103,9 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = - make_unique<DwarfTypeUnit>(InfoHolder.getUnits().size(), CU, Asm, this, - &InfoHolder, getDwoLineTable(CU)); + auto OwnedUnit = make_unique<DwarfTypeUnit>( + InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm, + this, &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); TU = &NewTU; @@ -2406,15 +2118,13 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, uint64_t Signature = makeTypeSignature(Identifier); NewTU.setTypeSignature(Signature); - if (!useSplitDwarf()) + if (useSplitDwarf()) + NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); + else { CU.applyStmtList(UnitDie); - - // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools - // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it. - NewTU.initSection( - useSplitDwarf() - ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) - : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + NewTU.initSection( + Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + } NewTU.setType(NewTU.createTypeDIE(CTy)); @@ -2442,29 +2152,12 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // If the type wasn't dependent on fission addresses, finish adding the type // and all its dependent types. - for (auto &TU : TypeUnitsToAdd) { - if (useSplitDwarf()) - TU.first->setSkeleton(constructSkeletonTU(*TU.first)); + for (auto &TU : TypeUnitsToAdd) InfoHolder.addUnit(std::move(TU.first)); - } } CU.addDIETypeSignature(RefDie, NewTU); } -void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, - MCSymbol *Begin, MCSymbol *End) { - assert(Begin && "Begin label should not be null!"); - assert(End && "End label should not be null!"); - assert(Begin->isDefined() && "Invalid starting label"); - assert(End->isDefined() && "Invalid end label"); - - Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); - if (DwarfVersion < 4) - Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End); - else - Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); -} - // Accelerator table mutators - add each name along with its companion // DIE to the proper table while ensuring that the name that we're going // to reference is in the string table. We do this since the names we diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index f2aa80845a05..a1a94264dddd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -11,29 +11,28 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ -#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H -#include "DwarfFile.h" #include "AsmPrinterHandler.h" -#include "DIE.h" #include "DbgValueHistoryCalculator.h" #include "DebugLocEntry.h" #include "DebugLocList.h" #include "DwarfAccelTable.h" +#include "DwarfFile.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" - #include <memory> namespace llvm { @@ -70,6 +69,7 @@ public: /// \brief This class is used to track local variable information. class DbgVariable { DIVariable Var; // Variable Descriptor. + DIExpression Expr; // Complex address location expression. DIE *TheDIE; // Variable DIE. unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. @@ -78,18 +78,22 @@ class DbgVariable { public: /// Construct a DbgVariable from a DIVariable. - DbgVariable(DIVariable V, DwarfDebug *DD) - : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), - FrameIndex(~0), DD(DD) {} + DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD) + : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U), + MInsn(nullptr), FrameIndex(~0), DD(DD) { + assert(Var.Verify() && Expr.Verify()); + } /// Construct a DbgVariable from a DEBUG_VALUE. /// AbstractVar may be NULL. DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) - : Var(DbgValue->getDebugVariable()), TheDIE(nullptr), - DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {} + : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()), + TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue), + FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } + DIExpression getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } @@ -124,14 +128,14 @@ public: bool variableHasComplexAddress() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.hasComplexAddress(); + return Expr.getNumElements() > 0; } bool isBlockByrefVariable() const; unsigned getNumAddrElements() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.getNumAddrElements(); + return Expr.getNumElements(); } - uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } + uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); } DIType getType() const; private: @@ -159,26 +163,15 @@ class DwarfDebug : public AsmPrinterHandler { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // Handle to the compile unit used for the inline extension handling, - // this is just so that the DIEValue allocator has a place to store - // the particular elements. - // FIXME: Store these off of DwarfDebug instead? - DwarfCompileUnit *FirstCU; - // Maps MDNode with its corresponding DwarfCompileUnit. MapVector<const MDNode *, DwarfCompileUnit *> CUMap; // Maps subprogram MDNode with its corresponding DwarfCompileUnit. - DenseMap<const MDNode *, DwarfCompileUnit *> SPMap; + MapVector<const MDNode *, DwarfCompileUnit *> SPMap; // Maps a CU DIE with its corresponding DwarfCompileUnit. DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; - /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can - /// be shared across CUs, that is why we keep the map here instead - /// of in DwarfCompileUnit. - DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; - // List of all labels used in aranges generation. std::vector<SymbolCU> ArangeLabels; @@ -189,19 +182,8 @@ class DwarfDebug : public AsmPrinterHandler { typedef DenseMap<const MCSection *, SmallVector<SymbolCU, 8> > SectionMapType; SectionMapType SectionMap; - // List of arguments for current function. - SmallVector<DbgVariable *, 8> CurrentFnArguments; - LexicalScopes LScopes; - // Collection of abstract subprogram DIEs. - DenseMap<const MDNode *, DIE *> AbstractSPDies; - - // Collection of dbg variables of a scope. - typedef DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > - ScopeVariablesMap; - ScopeVariablesMap ScopeVariables; - // Collection of abstract variables. DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; @@ -210,10 +192,6 @@ class DwarfDebug : public AsmPrinterHandler { // can refer to them in spite of insertions into this list. SmallVector<DebugLocList, 4> DotDebugLocEntries; - // Collection of subprogram DIEs that are marked (at the end of the module) - // as DW_AT_inline. - SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; - // This is a collection of subprogram MDNodes that are processed to // create DIEs. SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; @@ -243,10 +221,6 @@ class DwarfDebug : public AsmPrinterHandler { // If nonnull, stores the current machine instruction we're processing. const MachineInstr *CurMI; - // If nonnull, stores the section that the previous function was allocated to - // emitting. - const MCSection *PrevSection; - // If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; @@ -258,6 +232,7 @@ class DwarfDebug : public AsmPrinterHandler { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; + MCSymbol *DwarfTypesDWOSectionSym; MCSymbol *DwarfStrDWOSectionSym; MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; @@ -322,6 +297,7 @@ class DwarfDebug : public AsmPrinterHandler { // True iff there are multiple CUs in this module. bool SingleCU; + bool IsDarwin; AddressPool AddrPool; @@ -330,9 +306,9 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; - MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); + DenseMap<const Function *, DISubprogram> FunctionDIs; - void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { return InfoHolder.getUnits(); @@ -348,45 +324,8 @@ class DwarfDebug : public AsmPrinterHandler { void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, const MDNode *Scope); - /// \brief Find DIE for the given subprogram and attach appropriate - /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global - /// variables in this scope then create and insert DIEs for these - /// variables. - DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP); - - /// \brief A helper function to check whether the DIE for a given Scope is - /// going to be null. - bool isLexicalScopeDIENull(LexicalScope *Scope); - - /// \brief A helper function to construct a RangeSpanList for a given - /// lexical scope. - void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, - const SmallVectorImpl<InsnRange> &Range); - - /// \brief Construct new DW_TAG_lexical_block for this scope and - /// attach DW_AT_low_pc/DW_AT_high_pc labels. - std::unique_ptr<DIE> constructLexicalScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - - /// \brief This scope represents inlined body of a function. Construct - /// DIE to represent this concrete inlined copy of the function. - std::unique_ptr<DIE> constructInlinedScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - - /// \brief Construct a DIE for this scope. - std::unique_ptr<DIE> constructScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope, - DIE &ScopeDIE); /// \brief Construct a DIE for this abstract scope. - void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - /// \brief Construct a DIE for this subprogram scope. - DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope, - SmallVectorImpl<std::unique_ptr<DIE>> &Children); + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -422,6 +361,10 @@ class DwarfDebug : public AsmPrinterHandler { /// the line matrix. void emitEndOfLineMatrix(unsigned SectionEnd); + /// \brief Emit a specified accelerator table. + void emitAccel(DwarfAccelTable &Accel, const MCSection *Section, + StringRef TableName, StringRef SymName); + /// \brief Emit visible names into a hashed accelerator table section. void emitAccelNames(); @@ -447,10 +390,9 @@ class DwarfDebug : public AsmPrinterHandler { /// index. void emitDebugPubTypes(bool GnuStyle = false); - void - emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name, - const StringMap<const DIE *> &(DwarfUnit::*Accessor)() - const); + void emitDebugPubSection( + bool GnuStyle, const MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const); /// \brief Emit visible names into a debug str section. void emitDebugStr(); @@ -505,15 +447,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit); /// \brief Construct imported_module or imported_declaration DIE. - void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N); - - /// \brief Construct import_module DIE. - void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N, - DIE &Context); - - /// \brief Construct import_module DIE. - void constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const DIImportedEntity &Module, DIE &Context); + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const MDNode *N); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -525,38 +460,29 @@ class DwarfDebug : public AsmPrinterHandler { /// ending of a scope. void identifyScopeMarkers(); - /// \brief If Var is an current function argument that add it in - /// CurrentFnArguments list. - bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope); - /// \brief Populate LexicalScope entries with variables' info. - void collectVariableInfo(SmallPtrSet<const MDNode *, 16> &ProcessedVars); + void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, + SmallPtrSetImpl<const MDNode *> &ProcessedVars); + + /// \brief Build the location list for all DBG_VALUEs in the + /// function that describe the same variable. + void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::InstrRanges &Ranges); /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(SmallPtrSet<const MDNode *, 16> &P); + void collectVariableInfoFromMMITable(SmallPtrSetImpl<const MDNode *> &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); } - /// \brief Return Label preceding the instruction. - MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); } - /// \brief Return Label immediately following the instruction. - MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - - void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D, - const SmallVectorImpl<InsnRange> &Ranges); - void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin, - MCSymbol *End); - public: //===--------------------------------------------------------------------===// // Main entry points. @@ -565,13 +491,6 @@ public: ~DwarfDebug() override; - void insertDIE(const MDNode *TypeMD, DIE *Die) { - MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); - } - DIE *getDIE(const MDNode *TypeMD) { - return MDTypeNodeToDieMap.lookup(TypeMD); - } - /// \brief Emit all Dwarf sections that should come prior to the /// content. void beginModule(); @@ -624,11 +543,15 @@ public: /// Returns the section symbol for the .debug_loc section. MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } - /// Returns the previous section that was emitted into. - const MCSection *getPrevSection() const { return PrevSection; } + /// Returns the section symbol for the .debug_str section. + MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; } + + /// Returns the section symbol for the .debug_ranges section. + MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; } /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } + void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } /// Returns the entries for the .debug_loc section. const SmallVectorImpl<DebugLocList> & @@ -639,6 +562,14 @@ public: /// \brief Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); + /// \brief emit a single value for the debug loc section. + void emitDebugLocValue(ByteStreamer &Streamer, + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits = 0); + /// Emits an optimal (=sorted) sequence of DW_OP_pieces. + void emitLocPieces(ByteStreamer &Streamer, + const DITypeIdentifierMap &Map, + ArrayRef<DebugLocEntry::Value> Values); /// Emit the location for a debug loc entry, including the size header. void emitDebugLocEntryLocation(const DebugLocEntry &Entry); @@ -672,6 +603,40 @@ public: void addAccelNamespace(StringRef Name, const DIE &Die); void addAccelType(StringRef Name, const DIE &Die, char Flags); + + const MachineFunction *getCurrentFunction() const { return CurFn; } + const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; } + const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; } + + iterator_range<ImportedEntityMap::const_iterator> + findImportedEntitiesForScope(const MDNode *Scope) const { + return make_range(std::equal_range( + ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), + std::pair<const MDNode *, const MDNode *>(Scope, nullptr), + less_first())); + } + + /// \brief A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// \brief Return Label preceding the instruction. + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// \brief Return Label immediately following the instruction. + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + + // FIXME: Consider rolling ranges up into DwarfDebug since we use a single + // range_base anyway, so there's no need to keep them as separate per-CU range + // lists. (though one day we might end up with a range.dwo section, in which + // case it'd go to DwarfFile) + unsigned getNextRangeNumber() { return GlobalRangeCount++; } + + // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. + + SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { + return ProcessedSPNodes; + } }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 0440fce506d0..e8867c0a323c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H -#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -81,39 +81,6 @@ public: /// endFunction - Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; - -class Win64Exception : public EHStreamer { - /// shouldEmitPersonality - Per-function flag to indicate if personality - /// info should be emitted. - bool shouldEmitPersonality; - - /// shouldEmitLSDA - Per-function flag to indicate if the LSDA - /// should be emitted. - bool shouldEmitLSDA; - - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. - bool shouldEmitMoves; - -public: - //===--------------------------------------------------------------------===// - // Main entry points. - // - Win64Exception(AsmPrinter *A); - virtual ~Win64Exception(); - - /// endModule - Emit all exception information that should come after the - /// content. - void endModule() override; - - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// endFunction - Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; -}; - } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp new file mode 100644 index 000000000000..8e85effa965e --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -0,0 +1,260 @@ +//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfExpression.h" +#include "DwarfDebug.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +const TargetRegisterInfo *DwarfExpression::getTRI() const { + return AP.TM.getSubtargetImpl()->getRegisterInfo(); +} + +unsigned DwarfExpression::getDwarfVersion() const { + return AP.getDwarfDebug()->getDwarfVersion(); +} + +void DwarfExpression::AddReg(int DwarfReg, const char *Comment) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + if (DwarfReg < 32) { + EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + } else { + EmitOp(dwarf::DW_OP_regx, Comment); + EmitUnsigned(DwarfReg); + } +} + +void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + if (DwarfReg < 32) { + EmitOp(dwarf::DW_OP_breg0 + DwarfReg); + } else { + EmitOp(dwarf::DW_OP_bregx); + EmitUnsigned(DwarfReg); + } + EmitSigned(Offset); + if (Deref) + EmitOp(dwarf::DW_OP_deref); +} + +void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { + assert(SizeInBits > 0 && "piece has size zero"); + const unsigned SizeOfByte = 8; + if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { + EmitOp(dwarf::DW_OP_bit_piece); + EmitUnsigned(SizeInBits); + EmitUnsigned(OffsetInBits); + } else { + EmitOp(dwarf::DW_OP_piece); + unsigned ByteSize = SizeInBits / SizeOfByte; + EmitUnsigned(ByteSize); + } +} + +void DwarfExpression::AddShr(unsigned ShiftBy) { + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(ShiftBy); + EmitOp(dwarf::DW_OP_shr); +} + +bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { + int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false); + if (DwarfReg < 0) + return false; + + if (isFrameRegister(MachineReg)) { + // If variable offset is based in frame register then use fbreg. + EmitOp(dwarf::DW_OP_fbreg); + EmitSigned(Offset); + } else { + AddRegIndirect(DwarfReg, Offset); + } + return true; +} + +bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) { + const TargetRegisterInfo *TRI = getTRI(); + int Reg = TRI->getDwarfRegNum(MachineReg, false); + + // If this is a valid register number, emit it. + if (Reg >= 0) { + AddReg(Reg); + if (PieceSizeInBits) + AddOpPiece(PieceSizeInBits, PieceOffsetInBits); + return true; + } + + // Walk up the super-register chain until we find a valid number. + // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. + for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { + Reg = TRI->getDwarfRegNum(*SR, false); + if (Reg >= 0) { + unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); + AddReg(Reg, "super-register"); + if (PieceOffsetInBits == RegOffset) { + AddOpPiece(Size, RegOffset); + } else { + // If this is part of a variable in a sub-register at a + // non-zero offset, we need to manually shift the value into + // place, since the DW_OP_piece describes the part of the + // variable, not the position of the subregister. + if (RegOffset) + AddShr(RegOffset); + AddOpPiece(Size, PieceOffsetInBits); + } + return true; + } + } + + // Otherwise, attempt to find a covering set of sub-register numbers. + // For example, Q0 on ARM is a composition of D0+D1. + // + // Keep track of the current position so we can emit the more + // efficient DW_OP_piece. + unsigned CurPos = PieceOffsetInBits; + // The size of the register in bits, assuming 8 bits per byte. + unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8; + // Keep track of the bits in the register we already emitted, so we + // can avoid emitting redundant aliasing subregs. + SmallBitVector Coverage(RegSize, false); + for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI->getSubRegIdxSize(Idx); + unsigned Offset = TRI->getSubRegIdxOffset(Idx); + Reg = TRI->getDwarfRegNum(*SR, false); + + // Intersection between the bits we already emitted and the bits + // covered by this subregister. + SmallBitVector Intersection(RegSize, false); + Intersection.set(Offset, Offset + Size); + Intersection ^= Coverage; + + // If this sub-register has a DWARF number and we haven't covered + // its range, emit a DWARF piece for it. + if (Reg >= 0 && Intersection.any()) { + AddReg(Reg, "sub-register"); + AddOpPiece(Size, Offset == CurPos ? 0 : Offset); + CurPos = Offset + Size; + + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + } + } + + return CurPos > PieceOffsetInBits; +} + +void DwarfExpression::AddSignedConstant(int Value) { + EmitOp(dwarf::DW_OP_consts); + EmitSigned(Value); + // The proper way to describe a constant value is + // DW_OP_constu <const>, DW_OP_stack_value. + // Unfortunately, DW_OP_stack_value was not available until DWARF-4, + // so we will continue to generate DW_OP_constu <const> for DWARF-2 + // and DWARF-3. Technically, this is incorrect since DW_OP_const <const> + // actually describes a value at a constant addess, not a constant value. + // However, in the past there was no better way to describe a constant + // value, so the producers and consumers started to rely on heuristics + // to disambiguate the value vs. location status of the expression. + // See PR21176 for more details. + if (getDwarfVersion() >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + +void DwarfExpression::AddUnsignedConstant(unsigned Value) { + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(Value); + // cf. comment in DwarfExpression::AddSignedConstant(). + if (getDwarfVersion() >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + +static unsigned getOffsetOrZero(unsigned OffsetInBits, + unsigned PieceOffsetInBits) { + if (OffsetInBits == PieceOffsetInBits) + return 0; + assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces"); + return OffsetInBits; +} + +bool DwarfExpression::AddMachineRegExpression(DIExpression Expr, + unsigned MachineReg, + unsigned PieceOffsetInBits) { + unsigned N = Expr.getNumElements(); + unsigned I = 0; + bool ValidReg = false; + // Pattern-match combinations for which more efficient representations exist + // first. + if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_piece) { + unsigned SizeOfByte = 8; + unsigned OffsetInBits = Expr.getElement(1) * SizeOfByte; + unsigned SizeInBits = Expr.getElement(2) * SizeOfByte; + ValidReg = + AddMachineRegPiece(MachineReg, SizeInBits, + getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + I = 3; + } else if (N >= 3 && Expr.getElement(0) == dwarf::DW_OP_plus && + Expr.getElement(2) == dwarf::DW_OP_deref) { + // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset]. + unsigned Offset = Expr.getElement(1); + ValidReg = AddMachineRegIndirect(MachineReg, Offset); + I = 3; + } else if (N >= 1 && Expr.getElement(0) == dwarf::DW_OP_deref) { + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(MachineReg); + I = 1; + } else + ValidReg = AddMachineRegPiece(MachineReg); + + if (!ValidReg) + return false; + + // Emit remaining elements of the expression. + AddExpression(Expr, I); + return true; +} + +void DwarfExpression::AddExpression(DIExpression Expr, unsigned I, + unsigned PieceOffsetInBits) { + unsigned N = Expr.getNumElements(); + for (; I < N; ++I) { + switch (Expr.getElement(I)) { + case dwarf::DW_OP_piece: { + unsigned SizeOfByte = 8; + unsigned OffsetInBits = Expr.getElement(++I) * SizeOfByte; + unsigned SizeInBits = Expr.getElement(++I) * SizeOfByte; + AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + break; + } + case dwarf::DW_OP_plus: + EmitOp(dwarf::DW_OP_plus_uconst); + EmitUnsigned(Expr.getElement(++I)); + break; + case dwarf::DW_OP_deref: + EmitOp(dwarf::DW_OP_deref); + break; + default: + llvm_unreachable("unhandled opcode found in DIExpression"); + } + } +} diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h new file mode 100644 index 000000000000..92e4d5d8a290 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -0,0 +1,133 @@ +//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H + +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class AsmPrinter; +class ByteStreamer; +class TargetRegisterInfo; +class DwarfUnit; +class DIELoc; + +/// Base class containing the logic for constructing DWARF expressions +/// independently of whether they are emitted into a DIE or into a .debug_loc +/// entry. +class DwarfExpression { +protected: + const AsmPrinter &AP; + // Various convenience accessors that extract things out of AsmPrinter. + const TargetRegisterInfo *getTRI() const; + unsigned getDwarfVersion() const; + +public: + DwarfExpression(const AsmPrinter &AP) : AP(AP) {} + virtual ~DwarfExpression() {} + + /// Output a dwarf operand and an optional assembler comment. + virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; + /// Emit a raw signed value. + virtual void EmitSigned(int Value) = 0; + /// Emit a raw unsigned value. + virtual void EmitUnsigned(unsigned Value) = 0; + /// Return whether the given machine register is the frame register in the + /// current function. + virtual bool isFrameRegister(unsigned MachineReg) = 0; + + /// Emit a dwarf register operation. + void AddReg(int DwarfReg, const char *Comment = nullptr); + /// Emit an (double-)indirect dwarf register operation. + void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false); + + /// Emit a dwarf register operation for describing + /// - a small value occupying only part of a register or + /// - a register representing only part of a value. + void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0); + /// Emit a shift-right dwarf expression. + void AddShr(unsigned ShiftBy); + + /// Emit an indirect dwarf register operation for the given machine register. + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegIndirect(unsigned MachineReg, int Offset = 0); + + /// \brief Emit a partial DWARF register operation. + /// \param MachineReg the register + /// \param PieceSizeInBits size and + /// \param PieceOffsetInBits offset of the piece in bits, if this is one + /// piece of an aggregate value. + /// + /// If size and offset is zero an operation for the entire + /// register is emitted: Some targets do not provide a DWARF + /// register number for every register. If this is the case, this + /// function will attempt to emit a DWARF register by emitting a + /// piece of a super-register or by piecing together multiple + /// subregisters that alias the register. + /// + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0, + unsigned PieceOffsetInBits = 0); + + /// Emit a signed constant. + void AddSignedConstant(int Value); + /// Emit an unsigned constant. + void AddUnsignedConstant(unsigned Value); + + /// Emit an entire DIExpression on top of a machine register location. + /// \param PieceOffsetInBits If this is one piece out of a fragmented + /// location, this is the offset of the piece inside the entire variable. + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegExpression(DIExpression Expr, unsigned MachineReg, + unsigned PieceOffsetInBits = 0); + /// Emit a the operations in a DIExpression, starting from element I. + /// \param PieceOffsetInBits If this is one piece out of a fragmented + /// location, this is the offset of the piece inside the entire variable. + void AddExpression(DIExpression Expr, unsigned PieceOffsetInBits = 0, + unsigned I = 0); +}; + +/// DwarfExpression implementation for .debug_loc entries. +class DebugLocDwarfExpression : public DwarfExpression { + ByteStreamer &BS; + +public: + DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS) + : DwarfExpression(AP), BS(BS) {} + + void EmitOp(uint8_t Op, const char *Comment = nullptr) override; + void EmitSigned(int Value) override; + void EmitUnsigned(unsigned Value) override; + bool isFrameRegister(unsigned MachineReg) override; +}; + +/// DwarfExpression implementation for singular DW_AT_location. +class DIEDwarfExpression : public DwarfExpression { + DwarfUnit &DU; + DIELoc &DIE; + +public: + DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) + : DwarfExpression(AP), DU(DU), DIE(DIE) {} + + void EmitOp(uint8_t Op, const char *Comment = nullptr) override; + void EmitSigned(int Value) override; + void EmitUnsigned(unsigned Value) override; + bool isFrameRegister(unsigned MachineReg) override; +}; +} + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 737ee54f94b5..549abf88324b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -8,18 +8,18 @@ //===----------------------------------------------------------------------===// #include "DwarfFile.h" - #include "DwarfDebug.h" #include "DwarfUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/LEB128.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) - : Asm(AP), StrPool(DA, *Asm, Pref) {} +DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, + BumpPtrAllocator &DA) + : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {} DwarfFile::~DwarfFile() {} @@ -48,25 +48,18 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) { +void DwarfFile::emitUnits(const MCSymbol *ASectionSym) { for (const auto &TheU : CUs) { DIE &Die = TheU->getUnitDie(); const MCSection *USection = TheU->getSection(); Asm->OutStreamer.SwitchSection(USection); - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); - - // Emit size of content not including length itself - Asm->OutStreamer.AddComment("Length of Unit"); - Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize()); - TheU->emitHeader(ASectionSym); - DD->emitDIE(Die); - Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); + DD.emitDIE(Die); } } + // Compute the size and offset for each DIE. void DwarfFile::computeSizeAndOffsets() { // Offset from the first CU in the debug info section is 0 initially. @@ -149,8 +142,44 @@ void DwarfFile::emitAbbrevs(const MCSection *Section) { // Emit strings into a string section. void DwarfFile::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection, - const MCSymbol *StrSecSym) { - StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym); + const MCSection *OffsetSection) { + StrPool.emit(*Asm, StrSection, OffsetSection); +} + +void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; + DIVariable DV = Var->getVariable(); + // Variables with positive arg numbers are parameters. + if (unsigned ArgNum = DV.getArgNumber()) { + // Keep all parameters in order at the start of the variable list to ensure + // function types are correct (no out-of-order parameters) + // + // This could be improved by only doing it for optimized builds (unoptimized + // builds have the right order to begin with), searching from the back (this + // would catch the unoptimized case quickly), or doing a binary search + // rather than linear search. + auto I = Vars.begin(); + while (I != Vars.end()) { + unsigned CurNum = (*I)->getVariable().getArgNumber(); + // A local (non-parameter) variable has been found, insert immediately + // before it. + if (CurNum == 0) + break; + // A later indexed parameter has been found, insert immediately before it. + if (CurNum > ArgNum) + break; + // FIXME: There are still some cases where two inlined functions are + // conflated together (two calls to the same function at the same + // location (eg: via a macro, or without column info, etc)) and then + // their arguments are conflated as well. + assert((LS->getParent() || CurNum != ArgNum) && + "Duplicate argument for top level (non-inlined) function"); + ++I; + } + Vars.insert(I, Var); + return; + } + + Vars.push_back(Var); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 3985eb28cd6d..f14d673bf6ac 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -7,27 +7,29 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__ -#define CODEGEN_ASMPRINTER_DWARFFILE_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H +#include "AddressPool.h" +#include "DwarfStringPool.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" -#include "AddressPool.h" -#include "DwarfStringPool.h" - -#include <vector> -#include <string> #include <memory> +#include <string> +#include <vector> namespace llvm { class AsmPrinter; +class DbgVariable; class DwarfUnit; class DIEAbbrev; class MCSymbol; class DIE; +class DISubprogram; +class LexicalScope; class StringRef; class DwarfDebug; class MCSection; @@ -35,6 +37,8 @@ class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; + DwarfDebug ⅅ + // Used to uniquely define abbreviations. FoldingSet<DIEAbbrev> AbbreviationsSet; @@ -46,8 +50,20 @@ class DwarfFile { DwarfStringPool StrPool; + // Collection of dbg variables of a scope. + DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables; + + // Collection of abstract subprogram DIEs. + DenseMap<const MDNode *, DIE *> AbstractSPDies; + + /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in DwarfCompileUnit. + DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap; + public: - DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); + DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, + BumpPtrAllocator &DA); ~DwarfFile(); @@ -67,18 +83,34 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym); + void emitUnits(const MCSymbol *ASectionSym); /// \brief Emit a set of abbreviations to the specific section. void emitAbbrevs(const MCSection *); /// \brief Emit all of the strings to the section given. void emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = nullptr, - const MCSymbol *StrSecSym = nullptr); + const MCSection *OffsetSection = nullptr); /// \brief Returns the string pool. DwarfStringPool &getStringPool() { return StrPool; } + + void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + + DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() { + return ScopeVariables; + } + + DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { + return AbstractSPDies; + } + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return MDTypeNodeToDieMap.lookup(TypeMD); + } }; } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 72cab60e2321..d76b66cac69f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -12,14 +12,11 @@ using namespace llvm; -MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; } - static std::pair<MCSymbol *, unsigned> & getEntry(AsmPrinter &Asm, StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool, StringRef Prefix, StringRef Str) { - std::pair<MCSymbol *, unsigned> &Entry = - Pool.GetOrCreateValue(Str).getValue(); + std::pair<MCSymbol *, unsigned> &Entry = Pool[Str]; if (!Entry.first) { Entry.second = Pool.size() - 1; Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); @@ -36,8 +33,7 @@ unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) { } void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection, - const MCSection *OffsetSection, - const MCSymbol *StrSecSym) { + const MCSection *OffsetSection) { if (Pool.empty()) return; diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h index c1615fb4a297..63e34123d858 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -7,13 +7,12 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__ -#define CODEGEN_ASMPRINTER_STRINGPOOL_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Allocator.h" - #include <utility> namespace llvm { @@ -28,18 +27,13 @@ class StringRef; class DwarfStringPool { StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool; StringRef Prefix; - MCSymbol *SectionSymbol; public: DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix) - : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {} + : Pool(A), Prefix(Prefix) {} void emit(AsmPrinter &Asm, const MCSection *StrSection, - const MCSection *OffsetSection = nullptr, - const MCSymbol *StrSecSym = nullptr); - - /// \brief Returns the entry into the start of the pool. - MCSymbol *getSectionSymbol(); + const MCSection *OffsetSection = nullptr); /// \brief Returns an entry into the string pool with the given /// string text. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 9538bee7bca8..3d3da2ac1d31 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -13,7 +13,9 @@ #include "DwarfUnit.h" #include "DwarfAccelTable.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" +#include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" @@ -30,6 +32,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -40,24 +43,30 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, cl::desc("Generate DWARF4 type units."), cl::init(false)); +void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { + DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); +} +void DIEDwarfExpression::EmitSigned(int Value) { + DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); +} +void DIEDwarfExpression::EmitUnsigned(unsigned Value) { + DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); +} +bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { + return MachineReg == getTRI()->getFrameRegister(*AP.MF); +} + + /// Unit - Unit constructor. DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A), - DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr), - Skeleton(nullptr) { + DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { assert(UnitTag == dwarf::DW_TAG_compile_unit || UnitTag == dwarf::DW_TAG_type_unit); DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } -DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, - AsmPrinter *A, DwarfDebug *DW, - DwarfFile *DWU) - : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) { - insertDIE(Node, &getUnitDie()); -} - DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable) @@ -146,7 +155,7 @@ static bool isShareableAcrossCUs(DIDescriptor D) { /// will be kept in DwarfDebug for shareable DIEs. DIE *DwarfUnit::getDIE(DIDescriptor D) const { if (isShareableAcrossCUs(D)) - return DD->getDIE(D); + return DU->getDIE(D); return MDNodeToDieMap.lookup(D); } @@ -155,7 +164,7 @@ DIE *DwarfUnit::getDIE(DIDescriptor D) const { /// will be kept in DwarfDebug for shareable DIEs. void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { if (isShareableAcrossCUs(Desc)) { - DD->insertDIE(Desc, D); + DU->insertDIE(Desc, D); return; } MDNodeToDieMap.insert(std::make_pair(Desc, D)); @@ -206,10 +215,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, /// table. void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { - - if (!DD->useSplitDwarf()) + if (!isDwoUnit()) return addLocalString(Die, Attribute, String); + addIndexedString(Die, Attribute, String); +} + +void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute, + StringRef String) { unsigned idx = DU->getStringPool().getIndex(*Asm, String); DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); @@ -224,31 +237,12 @@ void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, DIEValue *Value; if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); - else { - MCSymbol *StringPool = DU->getStringPool().getSectionSymbol(); - Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); - } + else + Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym()); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); } -/// addExpr - Add a Dwarf expression attribute data and value. -/// -void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { - DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); - Die.addValue((dwarf::Attribute)0, Form, Value); -} - -/// addLocationList - Add a Dwarf loclistptr attribute data and value. -/// -void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, - unsigned Index) { - DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); - dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4; - Die.addValue(Attribute, Form, Value); -} - /// addLabel - Add a Dwarf label attribute data and value. /// void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, @@ -261,16 +255,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { addLabel(Die, (dwarf::Attribute)0, Form, Label); } -/// addSectionLabel - Add a Dwarf section label attribute data and value. -/// -void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label) { - if (DD->getDwarfVersion() >= 4) - addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); - else - addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); -} - /// addSectionOffset - Add an offset into a section attribute data and value. /// void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, @@ -281,45 +265,6 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); } -/// addLabelAddress - Add a dwarf label attribute data and value using -/// DW_FORM_addr or DW_FORM_GNU_addr_index. -/// -void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label) { - - if (!DD->useSplitDwarf()) - return addLocalLabelAddress(Die, Attribute, Label); - - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - - unsigned idx = DD->getAddressPool().getIndex(Label); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); -} - -void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, - dwarf::Attribute Attribute, - const MCSymbol *Label) { - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - - Die.addValue(Attribute, dwarf::DW_FORM_addr, - Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) - : new (DIEValueAllocator) DIEInteger(0)); -} - -unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { - // If we print assembly, we can't separate .file entries according to - // compile units. Thus all files will belong to the default compile unit. - - // FIXME: add a better feature test than hasRawTextSupport. Even better, - // extend .file to support this. - return Asm->OutStreamer.EmitDwarfFileDirective( - 0, DirName, FileName, - Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); -} - unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) : getCU().getOrCreateSourceID(FileName, DirName); @@ -339,16 +284,6 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { } } -/// addSectionDelta - Add a section label delta attribute data and value. -/// -void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Value); -} - void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); @@ -477,137 +412,19 @@ void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); } -/// addVariableAddress - Add DW_AT_location attribute for a -/// DbgVariable based on provided MachineLocation. -void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, - MachineLocation Location) { - if (DV.variableHasComplexAddress()) - addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV.isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); - else - addAddress(Die, dwarf::DW_AT_location, Location, - DV.getVariable().isIndirect()); -} - /// addRegisterOp - Add register operand. -void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - int DWReg = RI->getDwarfRegNum(Reg, false); - bool isSubRegister = DWReg < 0; - - unsigned Idx = 0; - - // Go up the super-register chain until we hit a valid dwarf register number. - for (MCSuperRegIterator SR(Reg, RI); SR.isValid() && DWReg < 0; ++SR) { - DWReg = RI->getDwarfRegNum(*SR, false); - if (DWReg >= 0) - Idx = RI->getSubRegIndex(*SR, Reg); - } - - if (DWReg < 0) { - DEBUG(dbgs() << "Invalid Dwarf register number.\n"); - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_nop); - return; - } - - // Emit register - if (DWReg < 32) - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); - else { - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); - } - - // Emit Mask - if (isSubRegister) { - unsigned Size = RI->getSubRegIdxSize(Idx); - unsigned Offset = RI->getSubRegIdxOffset(Idx); - if (Offset > 0) { - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); - addUInt(TheDie, dwarf::DW_FORM_data1, Size); - addUInt(TheDie, dwarf::DW_FORM_data1, Offset); - } else { - unsigned ByteSize = Size / 8; // Assuming 8 bits per byte. - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece); - addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize); - } - } +bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + unsigned SizeInBits, unsigned OffsetInBits) { + DIEDwarfExpression Expr(*Asm, *this, TheDie); + Expr.AddMachineRegPiece(Reg, SizeInBits, OffsetInBits); + return true; } /// addRegisterOffset - Add register offset. -void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, +bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned DWReg = RI->getDwarfRegNum(Reg, false); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - if (Reg == TRI->getFrameRegister(*Asm->MF)) - // If variable offset is based in frame register then use fbreg. - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); - else if (DWReg < 32) - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); - else { - addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); - } - addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); -} - -/// addAddress - Add an address attribute to a die based on the location -/// provided. -void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - - if (Location.isReg() && !Indirect) - addRegisterOp(*Loc, Location.getReg()); - else { - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); - if (Indirect && !Location.isReg()) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); -} - -/// addComplexAddress - Start with the address based on the location provided, -/// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DbgVariable, starting -/// from the starting location. Add the DWARF information to the die. -/// -void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (Location.isReg()) { - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1)); - i = 2; - } else - addRegisterOp(*Loc, Location.getReg()); - } else - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); - - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - if (!Location.isReg()) - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else - llvm_unreachable("unknown DIBuilder Opcode"); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); + DIEDwarfExpression Expr(*Asm, *this, TheDie); + return Expr.AddMachineRegIndirect(Reg, Offset); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -690,7 +507,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // Find the __forwarding field and the variable field in the __Block_byref // struct. - DIArray Fields = blockStruct.getTypeArray(); + DIArray Fields = blockStruct.getElements(); DIDerivedType varField; DIDerivedType forwardingField; @@ -711,10 +528,14 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // variable's location. DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + bool validReg; if (Location.isReg()) - addRegisterOp(*Loc, Location.getReg()); + validReg = addRegisterOpPiece(*Loc, Location.getReg()); else - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + if (!validReg) + return; // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). @@ -752,13 +573,18 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { dwarf::Tag T = (dwarf::Tag)Ty.getTag(); // Encode pointer constants as unsigned bytes. This is used at least for // null pointer constant emission. + // (Pieces of) aggregate types that get hacked apart by SROA may also be + // represented by a constant. Encode them as unsigned bytes. // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed // here, but accept them for now due to a bug in SROA producing bogus // dbg.values. - if (T == dwarf::DW_TAG_pointer_type || + if (T == dwarf::DW_TAG_array_type || + T == dwarf::DW_TAG_class_type || + T == dwarf::DW_TAG_pointer_type || T == dwarf::DW_TAG_ptr_to_member_type || T == dwarf::DW_TAG_reference_type || - T == dwarf::DW_TAG_rvalue_reference_type) + T == dwarf::DW_TAG_rvalue_reference_type || + T == dwarf::DW_TAG_structure_type) return true; assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || T == dwarf::DW_TAG_volatile_type || @@ -779,11 +605,14 @@ static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { Encoding == dwarf::DW_ATE_unsigned_char || Encoding == dwarf::DW_ATE_signed || Encoding == dwarf::DW_ATE_signed_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean) && + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + (Ty.getTag() == dwarf::DW_TAG_unspecified_type && + Ty.getName() == "decltype(nullptr)")) && "Unsupported encoding"); return (Encoding == dwarf::DW_ATE_unsigned || Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean); + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty.getTag() == dwarf::DW_TAG_unspecified_type); } /// If this type is derived from a base type then return base type size. @@ -1002,11 +831,9 @@ void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; DD->addAccelType(Ty.getName(), TyDIE, Flags); - if ((!Context || Context.isCompileUnit() || Context.isFile() || - Context.isNameSpace()) && - getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly) - GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = - &TyDIE; + if (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace()) + addGlobalType(Ty, TyDIE, Context); } } @@ -1031,14 +858,6 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) { addDIEEntry(Entity, Attribute, Entry); } -/// addGlobalName - Add a new global name to the compile unit. -void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) { - if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly) - return; - std::string FullName = getParentContextString(Context) + Name.str(); - GlobalNames[FullName] = &Die; -} - /// getParentContextString - Walks the metadata parent chain in a language /// specific manner (using the compile unit language) and returns /// it as a string. This is done at the metadata level because DIEs may @@ -1117,7 +936,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { addString(Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) - if (Size && Tag != dwarf::DW_TAG_pointer_type) + if (Size && Tag != dwarf::DW_TAG_pointer_type + && Tag != dwarf::DW_TAG_ptr_to_member_type) addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) @@ -1129,16 +949,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { } /// constructSubprogramArguments - Construct function argument DIEs. -void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) { +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) { for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Args.getElement(i); - if (Ty.isUnspecifiedParameter()) { + DIType Ty = resolve(Args.getElement(i)); + if (!Ty) { assert(i == N-1 && "Unspecified parameter must be the last argument"); createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); } else { DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); - addType(Arg, DIType(Ty)); - if (DIType(Ty).isArtificial()) + addType(Arg, Ty); + if (Ty.isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); } } @@ -1161,14 +981,14 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { break; case dwarf::DW_TAG_subroutine_type: { // Add return type. A void return won't have a type. - DIArray Elements = CTy.getTypeArray(); - DIType RTy(Elements.getElement(0)); + DITypeArray Elements = DISubroutineType(CTy).getTypeArray(); + DIType RTy(resolve(Elements.getElement(0))); if (RTy) addType(Buffer, RTy); bool isPrototyped = true; if (Elements.getNumElements() == 2 && - Elements.getElement(1).isUnspecifiedParameter()) + !Elements.getElement(1)) isPrototyped = false; constructSubprogramArguments(Buffer, Elements); @@ -1191,7 +1011,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); + DIArray Elements = CTy.getElements(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.isSubprogram()) @@ -1250,6 +1070,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isAppleBlockExtension()) addFlag(Buffer, dwarf::DW_AT_APPLE_block); + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type + // inside C++ composite types to point to the base class with the vtable. DICompositeType ContainingType(resolve(CTy.getContainingType())); if (ContainingType) addDIEEntry(Buffer, dwarf::DW_AT_containing_type, @@ -1327,10 +1149,10 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, addType(ParamDIE, resolve(VP.getType())); if (!VP.getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); - if (Value *Val = VP.getValue()) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) + if (Metadata *Val = VP.getValue()) { + if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) addConstantValue(ParamDIE, CI, resolve(VP.getType())); - else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) { + else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { // For declaration non-type template parameters (such as global values and // functions) DIELoc *Loc = new (DIEValueAllocator) DIELoc(); @@ -1373,20 +1195,23 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { } /// getOrCreateSubprogramDIE - Create new DIE using SP. -DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { +DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). - DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIE *ContextDIE = + Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext())); if (DIE *SPDie = getDIE(SP)) return SPDie; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { - // Add subprogram definitions to the CU die directly. - ContextDIE = &getUnitDie(); - // Build the decl now to ensure it precedes the definition. - getOrCreateSubprogramDIE(SPDecl); + if (!Minimal) { + // Add subprogram definitions to the CU die directly. + ContextDIE = &getUnitDie(); + // Build the decl now to ensure it precedes the definition. + getOrCreateSubprogramDIE(SPDecl); + } } // DW_TAG_inlined_subroutine may refer to this DIE. @@ -1401,14 +1226,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return &SPDie; } -void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) { - DISubprogram SPDecl = SP.getFunctionDeclaration(); - DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); - applySubprogramAttributes(SP, SPDie); - addGlobalName(SP.getName(), SPDie, Context); -} - -void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { +bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, + DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { @@ -1431,17 +1250,29 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); - if (DeclDie) { - // Refer to the function declaration where all the other attributes will be - // found. - addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); - return; - } + if (!DeclDie) + return false; + + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); + return true; +} + +void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, + bool Minimal) { + if (!Minimal) + if (applySubprogramDefinitionAttributes(SP, SPDie)) + return; // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); + // Skip the rest of the attributes under -gmlt to save space. + if (Minimal) + return; + addSourceLine(SPDie, SP); // Add the prototype if we have a prototype and we have a C like @@ -1452,15 +1283,15 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - DICompositeType SPTy = SP.getType(); + DISubroutineType SPTy = SP.getType(); assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && "the type of a subprogram should be a subroutine"); - DIArray Args = SPTy.getTypeArray(); + DITypeArray Args = SPTy.getTypeArray(); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. - if (Args.getElement(0)) - addType(SPDie, DIType(Args.getElement(0))); + if (resolve(Args.getElement(0))) + addType(SPDie, DIType(resolve(Args.getElement(0)))); unsigned VK = SP.getVirtuality(); if (VK) { @@ -1506,7 +1337,7 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { else if (SP.isPrivate()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else + else if (SP.isPublic()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); @@ -1514,184 +1345,6 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { addFlag(SPDie, dwarf::DW_AT_explicit); } -void DwarfUnit::applyVariableAttributes(const DbgVariable &Var, - DIE &VariableDie) { - StringRef Name = Var.getName(); - if (!Name.empty()) - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, Var.getVariable()); - addType(VariableDie, Var.getType()); - if (Var.isArtificial()) - addFlag(VariableDie, dwarf::DW_AT_artificial); -} - -// Return const expression if value is a GEP to access merged global -// constant. e.g. -// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) -static const ConstantExpr *getMergedGlobalExpr(const Value *V) { - const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); - if (!CE || CE->getNumOperands() != 3 || - CE->getOpcode() != Instruction::GetElementPtr) - return nullptr; - - // First operand points to a global struct. - Value *Ptr = CE->getOperand(0); - if (!isa<GlobalValue>(Ptr) || - !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) - return nullptr; - - // Second operand is zero. - const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); - if (!CI || !CI->isZero()) - return nullptr; - - // Third operand is offset. - if (!isa<ConstantInt>(CE->getOperand(2))) - return nullptr; - - return CE; -} - -/// createGlobalVariableDIE - create global variable DIE. -void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { - // Check for pre-existence. - if (getDIE(GV)) - return; - - assert(GV.isGlobalVariable()); - - DIScope GVContext = GV.getContext(); - DIType GTy = DD->resolve(GV.getType()); - - // If this is a static data member definition, some attributes belong - // to the declaration DIE. - DIE *VariableDIE = nullptr; - bool IsStaticMember = false; - DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); - if (SDMDecl.Verify()) { - assert(SDMDecl.isStaticMember() && "Expected static member decl"); - // We need the declaration DIE that is in the static member's class. - VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); - IsStaticMember = true; - } - - // If this is not a static data member definition, create the variable - // DIE and add the initial set of attributes to it. - if (!VariableDIE) { - // Construct the context before querying for the existence of the DIE in - // case such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(GVContext); - - // Add to map. - VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); - - // Add name and type. - addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - addType(*VariableDIE, GTy); - - // Add scoping info. - if (!GV.isLocalToUnit()) - addFlag(*VariableDIE, dwarf::DW_AT_external); - - // Add line number info. - addSourceLine(*VariableDIE, GV); - } - - // Add location. - bool addToAccelTable = false; - DIE *VariableSpecDIE = nullptr; - bool isGlobalVariable = GV.getGlobal() != nullptr; - if (isGlobalVariable) { - addToAccelTable = true; - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); - if (GV.getGlobal()->isThreadLocal()) { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); - } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); - } - // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); - } else { - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - } - // Do not create specification DIE if context is either compile unit - // or a subprogram. - if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { - // Create specification DIE. - VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie); - addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE); - addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc); - // A static member's declaration is already flagged as such. - if (!SDMDecl.Verify()) - addFlag(*VariableDIE, dwarf::DW_AT_declaration); - } else { - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - } - // Add the linkage name. - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: - // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and - // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE - : *VariableDIE, - DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name - : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); - } else if (const ConstantInt *CI = - dyn_cast_or_null<ConstantInt>(GV.getConstant())) { - // AT_const_value was added when the static member was created. To avoid - // emitting AT_const_value multiple times, we only add AT_const_value when - // it is not a static member. - if (!IsStaticMember) - addConstantValue(*VariableDIE, CI, GTy); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { - addToAccelTable = true; - // GV is a merged global. - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - Value *Ptr = CE->getOperand(0); - MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(*Loc, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - } - - if (addToAccelTable) { - DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE; - DD->addAccelName(GV.getName(), AddrDIE); - - // If the linkage name is different than the name, go ahead and output - // that as well into the name table. - if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - DD->addAccelName(GV.getLinkageName(), AddrDIE); - } - - addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, - GV.getContext()); -} - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); @@ -1700,9 +1353,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If // Count == -1 then the array is unbounded and we do not emit - // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and - // Count == 0, then the array has zero elements in which case we do not emit - // an upper bound. + // DW_AT_lower_bound and DW_AT_count attributes. int64_t LowerBound = SR.getLo(); int64_t DefaultLowerBound = getDefaultLowerBound(); int64_t Count = SR.getCount(); @@ -1710,11 +1361,22 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); - if (Count != -1 && Count != 0) + if (Count != -1) // FIXME: An unbounded array should reference the expression that defines // the array. - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, - LowerBound + Count - 1); + addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); +} + +DIE *DwarfUnit::getIndexTyDie() { + if (IndexTyDie) + return IndexTyDie; + // Construct an integer type to use for indexes. + IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); + addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype"); + addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_unsigned); + return IndexTyDie; } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. @@ -1729,18 +1391,9 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { // FIXME: This type should be passed down from the front end // as different languages may have different sizes for indexes. DIE *IdxTy = getIndexTyDie(); - if (!IdxTy) { - // Construct an integer type to use for indexes. - IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); - addString(*IdxTy, dwarf::DW_AT_name, "sizetype"); - addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); - addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_unsigned); - setIndexTyDie(IdxTy); - } // Add subranges to array type. - DIArray Elements = CTy.getTypeArray(); + DIArray Elements = CTy.getElements(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) @@ -1750,7 +1403,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { - DIArray Elements = CTy.getTypeArray(); + DIArray Elements = CTy.getElements(); // Add enumerators to enumeration type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { @@ -1788,68 +1441,6 @@ void DwarfUnit::constructContainingTypeDIEs() { } } -/// constructVariableDIE - Construct a DIE for the given DbgVariable. -std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV, - bool Abstract) { - auto D = constructVariableDIEImpl(DV, Abstract); - DV.setDIE(*D); - return D; -} - -std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, - bool Abstract) { - // Define variable debug information entry. - auto VariableDie = make_unique<DIE>(DV.getTag()); - - if (Abstract) { - applyVariableAttributes(DV, *VariableDie); - return VariableDie; - } - - // Add variable address. - - unsigned Offset = DV.getDotDebugLocOffset(); - if (Offset != ~0U) { - addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); - return VariableDie; - } - - // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV.getMInsn()) { - assert(DVInsn->getNumOperands() == 3); - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - // If the second operand is an immediate, this is an indirect value. - if (DVInsn->getOperand(1).isImm()) { - MachineLocation Location(RegOp.getReg(), - DVInsn->getOperand(1).getImm()); - addVariableAddress(DV, *VariableDie, Location); - } else if (RegOp.getReg()) - addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); - } else if (DVInsn->getOperand(0).isImm()) - addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); - else if (DVInsn->getOperand(0).isFPImm()) - addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), - DV.getType()); - - return VariableDie; - } - - // .. else use frame index. - int FI = DV.getFrameIndex(); - if (FI != ~0) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, *VariableDie, Location); - } - - return VariableDie; -} - /// constructMemberDIE - Construct member DIE from DIDerivedType. void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer); @@ -1922,7 +1513,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. - else + else if (DT.isPublic()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (DT.isVirtual()) @@ -1971,7 +1562,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { else if (DT.isPrivate()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else + else if (DT.isPublic()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); @@ -1984,6 +1575,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { } void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { + // Emit size of content not including length itself + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); + Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); @@ -1999,50 +1594,9 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } -void DwarfUnit::addRange(RangeSpan Range) { - // Only add a range for this unit if we're emitting full debug. - if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) { - // If we have no current ranges just add the range and return, otherwise, - // check the current section and CU against the previous section and CU we - // emitted into and the subprogram was contained within. If these are the - // same then extend our current range, otherwise add this as a new range. - if (CURanges.size() == 0 || - this != DD->getPrevCU() || - Asm->getCurrentSection() != DD->getPrevSection()) { - CURanges.push_back(Range); - return; - } - - assert(&(CURanges.back().getEnd()->getSection()) == - &(Range.getEnd()->getSection()) && - "We can only append to a range in the same section!"); - CURanges.back().setEnd(Range.getEnd()); - } -} - -void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { - // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym = - Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); - - stmtListIndex = UnitDie.getValues().size(); - - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym); - else - addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, - DwarfLineSectionSym); -} - -void DwarfCompileUnit::applyStmtList(DIE &D) { - D.addValue(dwarf::DW_AT_stmt_list, - UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), - UnitDie.getValues()[stmtListIndex]); +void DwarfUnit::initSection(const MCSection *Section) { + assert(!this->Section); + this->Section = Section; } void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { @@ -2055,16 +1609,8 @@ void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { sizeof(Ty->getOffset())); } -void DwarfTypeUnit::initSection(const MCSection *Section) { - assert(!this->Section); - this->Section = Section; - // Since each type unit is contained in its own COMDAT section, the begin - // label and the section label are the same. Using the begin label emission in - // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but - // the only other alternative of lazily constructing start-of-section labels - // and storing a mapping in DwarfDebug (or AsmPrinter). - this->SectionSym = this->LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); - this->LabelEnd = - Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); +bool DwarfTypeUnit::isDwoUnit() const { + // Since there are no skeleton type units, all type units are dwo type units + // when split DWARF is being used. + return DD->useSplitDwarf(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index b7b83b282d82..7a5e47ddf9dd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -11,20 +11,20 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H -#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H -#include "DIE.h" #include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCDwarf.h" namespace llvm { @@ -55,7 +55,8 @@ private: SmallVector<RangeSpan, 2> Ranges; public: - RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {} + RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges) + : RangeSym(Sym), Ranges(std::move(Ranges)) {} MCSymbol *getSym() const { return RangeSym; } const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } void addRange(RangeSpan Range) { Ranges.push_back(Range); } @@ -96,12 +97,6 @@ protected: /// descriptors to debug information entries using a DIEEntry proxy. DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap; - /// GlobalNames - A map of globally visible named entities for this unit. - StringMap<const DIE *> GlobalNames; - - /// GlobalTypes - A map of globally visible types for this unit. - StringMap<const DIE *> GlobalTypes; - /// DIEBlocks - A list of all the DIEBlocks in use. std::vector<DIEBlock *> DIEBlocks; @@ -113,13 +108,6 @@ protected: /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; - // List of ranges for a given compile unit. - SmallVector<RangeSpan, 1> CURanges; - - // List of range lists for a given compile unit, separate from the ranges for - // the CU itself. - SmallVector<RangeSpanList, 1> CURangeLists; - // DIEValueAllocator - All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; @@ -129,86 +117,32 @@ protected: /// The section this unit will be emitted in. const MCSection *Section; - /// A label at the start of the non-dwo section related to this unit. - MCSymbol *SectionSym; + DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); - /// The start of the unit within its section. - MCSymbol *LabelBegin; + void initSection(const MCSection *Section); - /// The end of the unit within its section. - MCSymbol *LabelEnd; + /// Add a string attribute data and value. + void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - /// Skeleton unit associated with this unit. - DwarfUnit *Skeleton; + void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); + bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie); public: virtual ~DwarfUnit(); - /// Set the skeleton unit associated with this unit. - void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; } - - /// Get the skeleton unit associated with this unit. - DwarfUnit *getSkeleton() const { return Skeleton; } - - /// Pass in the SectionSym even though we could recreate it in every compile - /// unit (type units will have actually distinct symbols once they're in - /// comdat sections). - void initSection(const MCSection *Section, MCSymbol *SectionSym) { - assert(!this->Section); - this->Section = Section; - this->SectionSym = SectionSym; - this->LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); - this->LabelEnd = - Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); - } - const MCSection *getSection() const { assert(Section); return Section; } - /// If there's a skeleton then return the section symbol for the skeleton - /// unit, otherwise return the section symbol for this unit. - MCSymbol *getLocalSectionSym() const { - if (Skeleton) - return Skeleton->getSectionSym(); - return getSectionSym(); - } - - MCSymbol *getSectionSym() const { - assert(Section); - return SectionSym; - } - - /// If there's a skeleton then return the begin label for the skeleton unit, - /// otherwise return the local label for this unit. - MCSymbol *getLocalLabelBegin() const { - if (Skeleton) - return Skeleton->getLabelBegin(); - return getLabelBegin(); - } - - MCSymbol *getLabelBegin() const { - assert(Section); - return LabelBegin; - } - - MCSymbol *getLabelEnd() const { - assert(Section); - return LabelEnd; - } - // Accessors. + AsmPrinter* getAsmPrinter() const { return Asm; } unsigned getUniqueID() const { return UniqueID; } uint16_t getLanguage() const { return CUNode.getLanguage(); } DICompileUnit getCUNode() const { return CUNode; } DIE &getUnitDie() { return UnitDie; } - const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } - const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } @@ -216,29 +150,15 @@ public: /// hasContent - Return true if this compile unit has something to write out. bool hasContent() const { return !UnitDie.getChildren().empty(); } - /// addRange - Add an address range to the list of ranges for this unit. - void addRange(RangeSpan Range); - - /// getRanges - Get the list of ranges for this unit. - const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } - SmallVectorImpl<RangeSpan> &getRanges() { return CURanges; } - - /// addRangeList - Add an address range list to the list of range lists. - void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); } - - /// getRangeLists - Get the vector of range lists. - const SmallVectorImpl<RangeSpanList> &getRangeLists() const { - return CURangeLists; - } - SmallVectorImpl<RangeSpanList> &getRangeLists() { return CURangeLists; } - /// getParentContextString - Get a string containing the language specific /// context for a global name. std::string getParentContextString(DIScope Context) const; - /// addGlobalName - Add a new global entity to the compile unit. - /// - void addGlobalName(StringRef Name, DIE &Die, DIScope Context); + /// Add a new global name to the compile unit. + virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {} + + /// Add a new global type to the compile unit. + virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {} /// addAccelNamespace - Add a new name to the namespace accelerator table. void addAccelNamespace(StringRef Name, const DIE &Die); @@ -275,14 +195,7 @@ public: void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); /// addString - Add a string attribute data and value. - void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str); - - /// addLocalString - Add a string attribute data and value. - void addLocalString(DIE &Die, dwarf::Attribute Attribute, - const StringRef Str); - - /// addExpr - Add a Dwarf expression attribute data and value. - void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); /// addLabel - Add a Dwarf label attribute data and value. void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, @@ -290,14 +203,6 @@ public: void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); - /// addLocationList - Add a Dwarf loclistptr attribute data and value. - void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); - - /// addSectionLabel - Add a Dwarf section label attribute data and value. - /// - void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label); - /// addSectionOffset - Add an offset into a section attribute data and value. /// void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); @@ -306,10 +211,6 @@ public: /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. void addOpAddress(DIELoc &Die, const MCSymbol *Label); - /// addSectionDelta - Add a label delta attribute data and value. - void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, - const MCSymbol *Lo); - /// addLabelDelta - Add a label delta attribute data and value. void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo); @@ -339,11 +240,6 @@ public: void addSourceLine(DIE &Die, DINameSpace NS); void addSourceLine(DIE &Die, DIObjCProperty Ty); - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect = false); - /// addConstantValue - Add constant value entry in variable DIE. void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty); void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty); @@ -358,19 +254,16 @@ public: /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); - /// addRegisterOp - Add register operand. - void addRegisterOp(DIELoc &TheDie, unsigned Reg); + /// \brief Add register operand. + /// \returns false if the register does not exist, e.g., because it was never + /// materialized. + bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + unsigned SizeInBits = 0, unsigned OffsetInBits = 0); - /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); - - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - void addComplexAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location); + /// \brief Add register offset. + /// \returns false if the register does not exist, e.g., because it was never + /// materialized. + bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); // FIXME: Should be reformulated in terms of addComplexAddress. /// addBlockByrefAddress - Start with the address based on the location @@ -382,11 +275,6 @@ public: dwarf::Attribute Attribute, const MachineLocation &Location); - /// addVariableAddress - Add DW_AT_location attribute for a - /// DbgVariable based on provided MachineLocation. - void addVariableAddress(const DbgVariable &DV, DIE &Die, - MachineLocation Location); - /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. @@ -397,11 +285,10 @@ public: DIE *getOrCreateNameSpace(DINameSpace NS); /// getOrCreateSubprogramDIE - Create new DIE using SP. - DIE *getOrCreateSubprogramDIE(DISubprogram SP); + DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false); - void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); - void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); - void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + void applySubprogramAttributes(DISubprogram SP, DIE &SPDie, + bool Minimal = false); /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. @@ -417,12 +304,8 @@ public: /// vtables. void constructContainingTypeDIEs(); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV, - bool Abstract = false); - /// constructSubprogramArguments - Construct function argument DIEs. - void constructSubprogramArguments(DIE &Buffer, DIArray Args); + void constructSubprogramArguments(DIE &Buffer, DITypeArray Args); /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. @@ -453,12 +336,13 @@ protected: /// none currently exists, create a new ID and insert it in the line table. virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; -private: - /// \brief Construct a DIE for the given DbgVariable without initializing the - /// DbgVariable's DIE reference. - std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV, - bool Abstract); + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T resolve(DIRef<T> Ref) const { + return DD->resolve(Ref); + } +private: /// constructTypeDIE - Construct basic type die from DIBasicType. void constructTypeDIE(DIE &Buffer, DIBasicType BTy); @@ -503,7 +387,7 @@ private: } // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { return IndexTyDie; } + DIE *getIndexTyDie(); // setIndexTyDie - Set D as anonymous type for index which can be reused // later. @@ -513,56 +397,22 @@ private: /// information entry. DIEEntry *createDIEEntry(DIE &Entry); - /// resolve - Look in the DwarfDebug map for the MDNode that - /// corresponds to the reference. - template <typename T> T resolve(DIRef<T> Ref) const { - return DD->resolve(Ref); - } - /// If this is a named finished type then include it in the list of types for /// the accelerator tables. void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE); -}; - -class DwarfCompileUnit : public DwarfUnit { - /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding - /// the need to search for it in applyStmtList. - unsigned stmtListIndex; - -public: - DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); - - void initStmtList(MCSymbol *DwarfLineSectionSym); - - /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. - void applyStmtList(DIE &D); - - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(DIGlobalVariable GV); - - /// addLabelAddress - Add a dwarf label attribute data and value using - /// either DW_FORM_addr or DW_FORM_GNU_addr_index. - void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label); - /// addLocalLabelAddress - Add a dwarf label attribute data and value using - /// DW_FORM_addr only. - void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label); - - DwarfCompileUnit &getCU() override { return *this; } - - unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + virtual bool isDwoUnit() const = 0; }; class DwarfTypeUnit : public DwarfUnit { -private: uint64_t TypeSignature; const DIE *Ty; DwarfCompileUnit &CU; MCDwarfDwoLineTable *SplitLineTable; + unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; + bool isDwoUnit() const override; + public: DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, @@ -578,11 +428,8 @@ public: return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature sizeof(uint32_t); // Type DIE Offset } - void initSection(const MCSection *Section); + using DwarfUnit::initSection; DwarfCompileUnit &getCU() override { return CU; } - -protected: - unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; }; } // end llvm namespace #endif diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 73f62bfd804e..1bc86f6c222a 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -121,7 +121,8 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { int TypeID = TypeIds[J]; assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); - int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; + int ValueForTypeID = + isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; @@ -195,9 +196,22 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { /// table. Entries must be ordered by try-range address. void EHStreamer:: computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) { + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced so we can put them in the LSDA. + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + // The end label of the previous invoke or nounwind try-range. MCSymbol *LastLabel = nullptr; @@ -208,6 +222,8 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // Whether the last CallSite entry was for an invoke. bool PreviousIsInvoke = false; + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + // Visit all instructions in order of address. for (const auto &MBB : *Asm->MF) { for (const auto &MI : MBB) { @@ -237,7 +253,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // instruction between the previous try-range and this one may throw, // create a call-site entry with no landing pad for the region between the // try-ranges. - if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { + if (SawPotentiallyThrowing && !IsSJLJ) { CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; @@ -254,14 +270,14 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, CallSiteEntry Site = { BeginLabel, LastLabel, - LandingPad->LandingPadLabel, + LandingPad, FirstActions[P.PadIndex] }; // Try to merge with the previous call-site. SJLJ doesn't do this - if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) { + if (PreviousIsInvoke && !IsSJLJ) { CallSiteEntry &Prev = CallSites.back(); - if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { + if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { // Extend the range of the previous entry. Prev.EndLabel = Site.EndLabel; continue; @@ -269,7 +285,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, } // Otherwise, create a new call-site. - if (Asm->MAI->isExceptionHandlingDwarf()) + if (!IsSJLJ) CallSites.push_back(Site); else { // SjLj EH must maintain the call sites in the order assigned @@ -287,7 +303,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { + if (SawPotentiallyThrowing && !IsSJLJ) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } @@ -314,7 +330,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. void EHStreamer::emitExceptionTable() { - const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); @@ -338,23 +354,9 @@ void EHStreamer::emitExceptionTable() { unsigned SizeActions = computeActionsTable(LandingPads, Actions, FirstActions); - // Invokes and nounwind calls have entries in PadMap (due to being bracketed - // by try-range labels when lowered). Ordinary calls do not, so appropriate - // try-ranges for them need be deduced when using DWARF exception handling. - RangeMapType PadMap; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LandingPad = LandingPads[i]; - for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { - MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; - assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); - PadRange P = { i, j }; - PadMap[BeginLabel] = P; - } - } - // Compute the call-site table. SmallVector<CallSiteEntry, 64> CallSites; - computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + computeCallSiteTable(CallSites, LandingPads, FirstActions); // Final tallies. @@ -519,8 +521,7 @@ void EHStreamer::emitExceptionTable() { Asm->EmitULEB128(S.Action); } } else { - // DWARF Exception handling - assert(Asm->MAI->isExceptionHandlingDwarf()); + // Itanium LSDA exception handling // The call-site table is a list of all call sites that may throw an // exception (including C++ 'throw' statements) in the procedure @@ -576,15 +577,15 @@ void EHStreamer::emitExceptionTable() { // Offset of the landing pad, counted in 16-byte bundles relative to the // @LPStart address. - if (!S.PadLabel) { + if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer.AddComment(" has no landing pad"); Asm->OutStreamer.EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" jumps to ") + - S.PadLabel->getName()); - Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4); + S.LPad->LandingPadLabel->getName()); + Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); } // Offset of the first associated action record, relative to the start of @@ -649,7 +650,7 @@ void EHStreamer::emitExceptionTable() { } void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { - const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); @@ -662,9 +663,9 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector<const GlobalVariable *>::const_reverse_iterator + for (std::vector<const GlobalValue *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalVariable *GV = *I; + const GlobalValue *GV = *I; if (VerboseAsm) Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); @@ -681,7 +682,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { unsigned TypeID = *I; if (VerboseAsm) { --Entry; - if (TypeID != 0) + if (isFilterEHSelector(TypeID)) Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 2b6ba788e6fa..9b316ff00e9b 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H -#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H #include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" @@ -23,6 +23,8 @@ class MachineModuleInfo; class MachineInstr; class MachineFunction; class AsmPrinter; +class MCSymbol; +class MCSymbolRefExpr; template <typename T> class SmallVectorImpl; @@ -60,11 +62,11 @@ protected: /// Structure describing an entry in the call-site table. struct CallSiteEntry { // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. + MCSymbol *BeginLabel; // Null indicates the start of the function. + MCSymbol *EndLabel; // Null indicates the end of the function. - // The landing pad starts at PadLabel. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. + // LPad contains the landing pad start labels. + const LandingPadInfo *LPad; // Null indicates that there is no landing pad. unsigned Action; }; @@ -86,7 +88,6 @@ protected: /// form gaps in the table. Entries must be ordered by try-range address. void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LPs, const SmallVectorImpl<unsigned> &FirstActions); @@ -113,6 +114,13 @@ protected: virtual void emitTypeInfos(unsigned TTypeEncoding); + // Helpers for for identifying what kind of clause an EH typeid or selector + // corresponds to. Negative selectors are for filter clauses, the zero + // selector is for cleanups, and positive selectors are for catch clauses. + static bool isFilterEHSelector(int Selector) { return Selector < 0; } + static bool isCleanupEHSelector(int Selector) { return Selector == 0; } + static bool isCatchEHSelector(int Selector) { return Selector > 0; } + public: EHStreamer(AsmPrinter *A); virtual ~EHStreamer(); diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index bfcbe6b94e7f..e293acd43a88 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -35,8 +36,8 @@ namespace { class ErlangGCPrinter : public GCMetadataPrinter { public: - void beginAssembly(AsmPrinter &AP) override; - void finishAssembly(AsmPrinter &AP) override; + void finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) override; }; } @@ -46,11 +47,11 @@ X("erlang", "erlang-compatible garbage collector"); void llvm::linkErlangGCPrinter() { } -void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } - -void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { +void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { MCStreamer &OS = AP.OutStreamer; - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = + AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); // Put this in a custom .note section. AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() @@ -58,9 +59,13 @@ void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { SectionKind::getDataRel())); // For each function... - for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + FI != IE; ++FI) { GCFunctionInfo &MD = **FI; - + if (MD.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; /** A compact GC layout. Emit this data structure: * * struct { diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 5a9ecd794a8d..ddb14a057363 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <cctype> using namespace llvm; @@ -33,8 +34,10 @@ namespace { class OcamlGCMetadataPrinter : public GCMetadataPrinter { public: - void beginAssembly(AsmPrinter &AP) override; - void finishAssembly(AsmPrinter &AP) override; + void beginAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) override; + void finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) override; }; } @@ -66,12 +69,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { AP.OutStreamer.EmitLabel(Sym); } -void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { +void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); - EmitCamlGlobal(getModule(), AP, "code_begin"); + EmitCamlGlobal(M, AP, "code_begin"); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); - EmitCamlGlobal(getModule(), AP, "data_begin"); + EmitCamlGlobal(M, AP, "data_begin"); } /// emitAssembly - Print the frametable. The ocaml frametable format is thus: @@ -90,24 +94,30 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if /// either condition is detected in a function which uses the GC. /// -void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); +void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + unsigned IntPtrSize = + AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); - EmitCamlGlobal(getModule(), AP, "code_end"); + EmitCamlGlobal(M, AP, "code_end"); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); - EmitCamlGlobal(getModule(), AP, "data_end"); + EmitCamlGlobal(M, AP, "data_end"); // FIXME: Why does ocaml emit this?? AP.OutStreamer.EmitIntValue(0, IntPtrSize); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); - EmitCamlGlobal(getModule(), AP, "frametable"); + EmitCamlGlobal(M, AP, "frametable"); int NumDescriptors = 0; - for (iterator I = begin(), IE = end(); I != IE; ++I) { + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); I != IE; ++I) { GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { NumDescriptors++; } @@ -120,8 +130,12 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { AP.EmitInt16(NumDescriptors); AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); - for (iterator I = begin(), IE = end(); I != IE; ++I) { + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); I != IE; ++I) { GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; uint64_t FrameSize = FI.getFrameSize(); if (FrameSize >= 1<<16) { diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 81285d55d636..2138cb9514f8 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "DwarfException.h" +#include "Win64Exception.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" @@ -99,9 +99,156 @@ void Win64Exception::endFunction(const MachineFunction *) { if (shouldEmitPersonality) { Asm->OutStreamer.PushSection(); + + // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer.EmitWinEHHandlerData(); - emitExceptionTable(); + + // Emit either MSVC-compatible tables or the usual Itanium-style LSDA after + // the UNWIND_INFO struct. + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::MSVC) { + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + if (Per->getName() == "__C_specific_handler") + emitCSpecificHandlerTable(); + else + report_fatal_error(Twine("unexpected personality function: ") + + Per->getName()); + } else { + emitExceptionTable(); + } + Asm->OutStreamer.PopSection(); } Asm->OutStreamer.EmitWinCFIEndProc(); } + +const MCSymbolRefExpr *Win64Exception::createImageRel32(const MCSymbol *Value) { + return MCSymbolRefExpr::Create(Value, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); +} + +/// Emit the language-specific data that __C_specific_handler expects. This +/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning +/// up after faults with __try, __except, and __finally. The typeinfo values +/// are not really RTTI data, but pointers to filter functions that return an +/// integer (1, 0, or -1) indicating how to handle the exception. For __finally +/// blocks and other cleanups, the landing pad label is zero, and the filter +/// function is actually a cleanup handler with the same prototype. A catch-all +/// entry is modeled with a null filter function field and a non-zero landing +/// pad label. +/// +/// Possible filter function return values: +/// EXCEPTION_EXECUTE_HANDLER (1): +/// Jump to the landing pad label after cleanups. +/// EXCEPTION_CONTINUE_SEARCH (0): +/// Continue searching this table or continue unwinding. +/// EXCEPTION_CONTINUE_EXECUTION (-1): +/// Resume execution at the trapping PC. +/// +/// Inferred table structure: +/// struct Table { +/// int NumEntries; +/// struct Entry { +/// imagerel32 LabelStart; +/// imagerel32 LabelEnd; +/// imagerel32 FilterOrFinally; // Zero means catch-all. +/// imagerel32 LabelLPad; // Zero means __finally. +/// } Entries[NumEntries]; +/// }; +void Win64Exception::emitCSpecificHandlerTable() { + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + + // Simplifying assumptions for first implementation: + // - Cleanups are not implemented. + // - Filters are not implemented. + + // The Itanium LSDA table sorts similar landing pads together to simplify the + // actions table, but we don't need that. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + for (const auto &LP : PadInfos) + LandingPads.push_back(&LP); + + // Compute label ranges for call sites as we would for the Itanium LSDA, but + // use an all zero action table because we aren't using these actions. + SmallVector<unsigned, 64> FirstActions; + FirstActions.resize(LandingPads.size()); + SmallVector<CallSiteEntry, 64> CallSites; + computeCallSiteTable(CallSites, LandingPads, FirstActions); + + MCSymbol *EHFuncBeginSym = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + MCSymbol *EHFuncEndSym = + Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); + + // Emit the number of table entries. + unsigned NumEntries = 0; + for (const CallSiteEntry &CSE : CallSites) { + if (!CSE.LPad) + continue; // Ignore gaps. + for (int Selector : CSE.LPad->TypeIds) { + // Ignore C++ filter clauses in SEH. + // FIXME: Implement cleanup clauses. + if (isCatchEHSelector(Selector)) + ++NumEntries; + } + } + Asm->OutStreamer.EmitIntValue(NumEntries, 4); + + // Emit the four-label records for each call site entry. The table has to be + // sorted in layout order, and the call sites should already be sorted. + for (const CallSiteEntry &CSE : CallSites) { + // Ignore gaps. Unlike the Itanium model, unwinding through a frame without + // an EH table entry will propagate the exception rather than terminating + // the program. + if (!CSE.LPad) + continue; + const LandingPadInfo *LPad = CSE.LPad; + + // Compute the label range. We may reuse the function begin and end labels + // rather than forming new ones. + const MCExpr *Begin = + createImageRel32(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym); + const MCExpr *End; + if (CSE.EndLabel) { + // The interval is half-open, so we have to add one to include the return + // address of the last invoke in the range. + End = MCBinaryExpr::CreateAdd(createImageRel32(CSE.EndLabel), + MCConstantExpr::Create(1, Asm->OutContext), + Asm->OutContext); + } else { + End = createImageRel32(EHFuncEndSym); + } + + // These aren't really type info globals, they are actually pointers to + // filter functions ordered by selector. The zero selector is used for + // cleanups, so slot zero corresponds to selector 1. + const std::vector<const GlobalValue *> &SelectorToFilter = MMI->getTypeInfos(); + + // Do a parallel iteration across typeids and clause labels, skipping filter + // clauses. + assert(LPad->TypeIds.size() == LPad->ClauseLabels.size()); + for (size_t I = 0, E = LPad->TypeIds.size(); I < E; ++I) { + // AddLandingPadInfo stores the clauses in reverse, but there is a FIXME + // to change that. + int Selector = LPad->TypeIds[E - I - 1]; + MCSymbol *ClauseLabel = LPad->ClauseLabels[I]; + + // Ignore C++ filter clauses in SEH. + // FIXME: Implement cleanup clauses. + if (!isCatchEHSelector(Selector)) + continue; + + Asm->OutStreamer.EmitValue(Begin, 4); + Asm->OutStreamer.EmitValue(End, 4); + if (isCatchEHSelector(Selector)) { + assert(unsigned(Selector - 1) < SelectorToFilter.size()); + const GlobalValue *TI = SelectorToFilter[Selector - 1]; + if (TI) // Emit the filter function pointer. + Asm->OutStreamer.EmitValue(createImageRel32(Asm->getSymbol(TI)), 4); + else // Otherwise, this is a "catch i8* null", or catch all. + Asm->OutStreamer.EmitIntValue(0, 4); + } + Asm->OutStreamer.EmitValue(createImageRel32(ClauseLabel), 4); + } + } +} diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h new file mode 100644 index 000000000000..b2d5d1bce563 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -0,0 +1,56 @@ +//===-- Win64Exception.h - Windows Exception Handling ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H + +#include "EHStreamer.h" + +namespace llvm { +class MachineFunction; + +class Win64Exception : public EHStreamer { + /// Per-function flag to indicate if personality info should be emitted. + bool shouldEmitPersonality; + + /// Per-function flag to indicate if the LSDA should be emitted. + bool shouldEmitLSDA; + + /// Per-function flag to indicate if frame moves info should be emitted. + bool shouldEmitMoves; + + void emitCSpecificHandlerTable(); + + const MCSymbolRefExpr *createImageRel32(const MCSymbol *Value); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + Win64Exception(AsmPrinter *A); + virtual ~Win64Exception(); + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; +}; +} + +#endif + diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 6a5c431d4fd1..b5e0929efd91 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -116,15 +116,67 @@ WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) Asm = AP; } +void WinCodeViewLineTables::endModule() { + if (FnDebugInfo.empty()) + return; + + assert(Asm != nullptr); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + // Emit per-function debug information. This code is extracted into a + // separate function for readability. + for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) + emitDebugInfoForFunction(VisitedFunctions[I]); + + // This subsection holds a file index to offset in string table table. + Asm->OutStreamer.AddComment("File index to string table offset subsection"); + Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); + size_t NumFilenames = FileNameRegistry.Infos.size(); + Asm->EmitInt32(8 * NumFilenames); + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + StringRef Filename = FileNameRegistry.Filenames[I]; + // For each unique filename, just write its offset in the string table. + Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); + // The function name offset is not followed by any additional data. + Asm->EmitInt32(0); + } + + // This subsection holds the string table. + Asm->OutStreamer.AddComment("String table"); + Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); + Asm->EmitInt32(FileNameRegistry.LastOffset); + // The payload starts with a null character. + Asm->EmitInt8(0); + + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + // Just emit unique filenames one by one, separated by a null character. + Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); + Asm->EmitInt8(0); + } + + // No more subsections. Fill with zeros to align the end of the section by 4. + Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); + + clear(); +} + static void EmitLabelDiff(MCStreamer &Streamer, - const MCSymbol *From, const MCSymbol *To) { + const MCSymbol *From, const MCSymbol *To, + unsigned int Size = 4) { MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; MCContext &Context = Streamer.getContext(); const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context), *ToRef = MCSymbolRefExpr::Create(To, Variant, Context); const MCExpr *AddrDelta = MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context); - Streamer.EmitValue(AddrDelta, 4); + Streamer.EmitValue(AddrDelta, Size); } void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { @@ -138,6 +190,51 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { return; assert(FI.End && "Don't know where the function ends?"); + StringRef FuncName = getDISubprogram(GV).getDisplayName(), + GVName = GV->getName(); + // FIXME Clang currently sets DisplayName to "bar" for a C++ + // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying + // to demangle display names anyways, so let's just put a mangled name into + // the symbols subsection until Clang gives us what we need. + if (GVName.startswith("\01?")) + FuncName = GVName.substr(1); + // Emit a symbol subsection, required by VS2012+ to find function boundaries. + MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(), + *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName)); + Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION); + EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd); + Asm->OutStreamer.EmitLabel(SymbolsBegin); + { + MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(), + *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); + Asm->OutStreamer.EmitLabel(ProcSegmentBegin); + + Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START); + // Some bytes of this segment don't seem to be required for basic debugging, + // so just fill them with zeroes. + Asm->OutStreamer.EmitFill(12, 0); + // This is the important bit that tells the debugger where the function + // code is located and what's its size: + EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); + Asm->OutStreamer.EmitFill(12, 0); + Asm->OutStreamer.EmitCOFFSecRel32(Fn); + Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + Asm->EmitInt8(0); + // Emit the function display name as a null-terminated string. + Asm->OutStreamer.EmitBytes(FuncName); + Asm->EmitInt8(0); + Asm->OutStreamer.EmitLabel(ProcSegmentEnd); + + // We're done with this function. + Asm->EmitInt16(0x0002); + Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END); + } + Asm->OutStreamer.EmitLabel(SymbolsEnd); + // Every subsection must be aligned to a 4-byte boundary. + Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0); + // PCs/Instructions are grouped into segments sharing the same filename. // Pre-calculate the lengths (in instructions) of these segments and store // them in a map for convenience. Each index in the map is the sequential @@ -154,18 +251,19 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { } FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; - // Emit the control code of the subsection followed by the payload size. - Asm->OutStreamer.AddComment( - "Linetable subsection for " + Twine(Fn->getName())); + // Emit a line table subsection, requred to do PC-to-file:line lookup. + Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName)); Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); - MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(), - *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol(); - EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd); - Asm->OutStreamer.EmitLabel(SubsectionBegin); + MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(), + *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd); + Asm->OutStreamer.EmitLabel(LineTableBegin); // Identify the function this subsection is for. Asm->OutStreamer.EmitCOFFSecRel32(Fn); Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + // Insert padding after a 16-bit section index. + Asm->EmitInt16(0); // Length of the function's code, in bytes. EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); @@ -209,56 +307,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { if (FileSegmentEnd) Asm->OutStreamer.EmitLabel(FileSegmentEnd); - Asm->OutStreamer.EmitLabel(SubsectionEnd); -} - -void WinCodeViewLineTables::endModule() { - if (FnDebugInfo.empty()) - return; - - assert(Asm != nullptr); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); - Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); - - // The COFF .debug$S section consists of several subsections, each starting - // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length - // of the payload followed by the payload itself. The subsections are 4-byte - // aligned. - - for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) - emitDebugInfoForFunction(VisitedFunctions[I]); - - // This subsection holds a file index to offset in string table table. - Asm->OutStreamer.AddComment("File index to string table offset subsection"); - Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); - size_t NumFilenames = FileNameRegistry.Infos.size(); - Asm->EmitInt32(8 * NumFilenames); - for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { - StringRef Filename = FileNameRegistry.Filenames[I]; - // For each unique filename, just write it's offset in the string table. - Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); - // The function name offset is not followed by any additional data. - Asm->EmitInt32(0); - } - - // This subsection holds the string table. - Asm->OutStreamer.AddComment("String table"); - Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); - Asm->EmitInt32(FileNameRegistry.LastOffset); - // The payload starts with a null character. - Asm->EmitInt8(0); - - for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { - // Just emit unique filenames one by one, separated by a null character. - Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); - Asm->EmitInt8(0); - } - - // No more subsections. Fill with zeros to align the end of the section by 4. - Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); - - clear(); + Asm->OutStreamer.EmitLabel(LineTableEnd); } void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index 0734d97ba714..8492eacb7ff7 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ -#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H #include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp deleted file mode 100644 index 421946ded40b..000000000000 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ /dev/null @@ -1,380 +0,0 @@ -//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass (at IR level) to replace atomic instructions with -// appropriate (intrinsic-based) ldrex/strex loops. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "arm-atomic-expand" - -namespace { - class AtomicExpandLoadLinked : public FunctionPass { - const TargetMachine *TM; - public: - static char ID; // Pass identification, replacement for typeid - explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { - initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); - - bool expandAtomicLoad(LoadInst *LI); - bool expandAtomicStore(StoreInst *LI); - bool expandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - }; -} - -char AtomicExpandLoadLinked::ID = 0; -char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; -INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", - "Expand Atomic calls in terms of load-linked & store-conditional", - false, false) - -FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { - return new AtomicExpandLoadLinked(TM); -} - -bool AtomicExpandLoadLinked::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) - return false; - - SmallVector<Instruction *, 1> AtomicInsts; - - // Changing control-flow while iterating through it is a bad idea, so gather a - // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || - (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || - (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } - - bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) - continue; - - if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) - MadeChange |= expandAtomicRMW(AI); - else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) - MadeChange |= expandAtomicCmpXchg(CI); - else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - MadeChange |= expandAtomicLoad(LI); - else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - MadeChange |= expandAtomicStore(SI); - else - llvm_unreachable("Unknown atomic instruction"); - } - - return MadeChange; -} - -bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { - // Load instructions don't actually need a leading fence, even in the - // SequentiallyConsistent case. - AtomicOrdering MemOpOrder = - TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic - : LI->getOrdering(); - - // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is - // an ldrexd (A3.5.3). - IRBuilder<> Builder(LI); - Value *Val = TM->getTargetLowering()->emitLoadLinked( - Builder, LI->getPointerOperand(), MemOpOrder); - - insertTrailingFence(Builder, LI->getOrdering()); - - LI->replaceAllUsesWith(Val); - LI->eraseFromParent(); - - return true; -} - -bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) { - // The only atomic 64-bit store on ARM is an strexd that succeeds, which means - // we need a loop and the entire instruction is essentially an "atomicrmw - // xchg" that ignores the value loaded. - IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); - SI->eraseFromParent(); - - // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); -} - -bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { - AtomicOrdering Order = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // fence? - // atomicrmw.start: - // %loaded = @load.linked(%addr) - // %new = some_op iN %loaded, %incr - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %atomicrmw.end - // atomicrmw.end: - // fence? - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = - TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); - - Value *NewVal; - switch (AI->getOperation()) { - case AtomicRMWInst::Xchg: - NewVal = AI->getValOperand(); - break; - case AtomicRMWInst::Add: - NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Sub: - NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::And: - NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Nand: - NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), - "new"); - break; - case AtomicRMWInst::Or: - NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Xor: - NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - default: - llvm_unreachable("Unknown atomic op"); - } - - Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( - Builder, NewVal, Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - insertTrailingFence(Builder, Order); - - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); - AtomicOrdering FailureOrder = CI->getFailureOrdering(); - Value *Addr = CI->getPointerOperand(); - BasicBlock *BB = CI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord - // - // The full expansion we produce is: - // [...] - // fence? - // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure - // cmpxchg.trystore: - // %stored = @store_conditional(%new, %addr) - // %success = icmp eq i32 %stored, 0 - // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure - // cmpxchg.success: - // fence? - // br label %cmpxchg.end - // cmpxchg.failure: - // fence? - // br label %cmpxchg.end - // cmpxchg.end: - // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] - // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 - // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); - - // This grabs the DebugLoc from CI - IRBuilder<> Builder(CI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = - TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); - - // If the the cmpxchg doesn't actually need any ordering when it fails, we can - // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); - - Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); - StoreSuccess = Builder.CreateICmpEQ( - StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(StoreSuccess, SuccessBB, - CI->isWeak() ? FailureBB : LoopBB); - - // Make sure later instructions don't get reordered with a fence if necessary. - Builder.SetInsertPoint(SuccessBB); - insertTrailingFence(Builder, SuccessOrder); - Builder.CreateBr(ExitBB); - - Builder.SetInsertPoint(FailureBB); - insertTrailingFence(Builder, FailureOrder); - Builder.CreateBr(ExitBB); - - // Finally, we have control-flow based knowledge of whether the cmpxchg - // succeeded or not. We expose this to later passes by converting any - // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. - - // Setup the builder so we can create any PHIs we need. - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); - Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); - Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - - // Look for any users of the cmpxchg that are just comparing the loaded value - // against the desired one, and replace them with the CFG-derived version. - SmallVector<ExtractValueInst *, 2> PrunedInsts; - for (auto User : CI->users()) { - ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); - if (!EV) - continue; - - assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && - "weird extraction from { iN, i1 }"); - - if (EV->getIndices()[0] == 0) - EV->replaceAllUsesWith(Loaded); - else - EV->replaceAllUsesWith(Success); - - PrunedInsts.push_back(EV); - } - - // We can remove the instructions now we're no longer iterating through them. - for (auto EV : PrunedInsts) - EV->eraseFromParent(); - - if (!CI->use_empty()) { - // Some use of the full struct return that we don't understand has happened, - // so we've got to reconstruct it properly. - Value *Res; - Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); - Res = Builder.CreateInsertValue(Res, Success, 1); - - CI->replaceAllUsesWith(Res); - } - - CI->eraseFromParent(); - return true; -} - -AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TM->getTargetLowering()->getInsertFencesForAtomic()) - return Ord; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - Builder.CreateFence(Release); - - // The exclusive operations don't need any barrier if we're adding separate - // fences. - return Monotonic; -} - -void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TM->getTargetLowering()->getInsertFencesForAtomic()) - return; - - if (Ord == Acquire || Ord == AcquireRelease) - Builder.CreateFence(Acquire); - else if (Ord == SequentiallyConsistent) - Builder.CreateFence(SequentiallyConsistent); -} diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp new file mode 100644 index 000000000000..12f6bd77d334 --- /dev/null +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -0,0 +1,563 @@ +//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "atomic-expand" + +namespace { + class AtomicExpand: public FunctionPass { + const TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpand(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad); + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicLoadToLL(LoadInst *LI); + bool expandAtomicLoadToCmpXchg(LoadInst *LI); + bool expandAtomicStore(StoreInst *SI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); + bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + bool isIdempotentRMW(AtomicRMWInst *AI); + bool simplifyIdempotentRMW(AtomicRMWInst *AI); + }; +} + +char AtomicExpand::ID = 0; +char &llvm::AtomicExpandID = AtomicExpand::ID; +INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", + "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", + false, false) + +FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { + return new AtomicExpand(TM); +} + +bool AtomicExpand::runOnFunction(Function &F) { + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) + return false; + auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); + + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->isAtomic()) + AtomicInsts.push_back(&*I); + } + + bool MadeChange = false; + for (auto I : AtomicInsts) { + auto LI = dyn_cast<LoadInst>(I); + auto SI = dyn_cast<StoreInst>(I); + auto RMWI = dyn_cast<AtomicRMWInst>(I); + auto CASI = dyn_cast<AtomicCmpXchgInst>(I); + assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) && + "Unknown atomic instruction"); + + auto FenceOrdering = Monotonic; + bool IsStore, IsLoad; + if (TargetLowering->getInsertFencesForAtomic()) { + if (LI && isAtLeastAcquire(LI->getOrdering())) { + FenceOrdering = LI->getOrdering(); + LI->setOrdering(Monotonic); + IsStore = false; + IsLoad = true; + } else if (SI && isAtLeastRelease(SI->getOrdering())) { + FenceOrdering = SI->getOrdering(); + SI->setOrdering(Monotonic); + IsStore = true; + IsLoad = false; + } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || + isAtLeastAcquire(RMWI->getOrdering()))) { + FenceOrdering = RMWI->getOrdering(); + RMWI->setOrdering(Monotonic); + IsStore = IsLoad = true; + } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { + // If a compare and swap is lowered to LL/SC, we can do smarter fence + // insertion, with a stronger one on the success path than on the + // failure path. As a result, fence insertion is directly done by + // expandAtomicCmpXchg in that case. + FenceOrdering = CASI->getSuccessOrdering(); + CASI->setSuccessOrdering(Monotonic); + CASI->setFailureOrdering(Monotonic); + IsStore = IsLoad = true; + } + + if (FenceOrdering != Monotonic) { + MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + } + } + + if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { + MadeChange |= expandAtomicLoad(LI); + } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { + MadeChange |= expandAtomicStore(SI); + } else if (RMWI) { + // There are two different ways of expanding RMW instructions: + // - into a load if it is idempotent + // - into a Cmpxchg/LL-SC loop otherwise + // we try them in that order. + MadeChange |= (isIdempotentRMW(RMWI) && + simplifyIdempotentRMW(RMWI)) || + (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) && + expandAtomicRMW(RMWI)); + } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { + MadeChange |= expandAtomicCmpXchg(CASI); + } + } + return MadeChange; +} + +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad) { + IRBuilder<> Builder(I); + + auto LeadingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( + Builder, Order, IsStore, IsLoad); + + auto TrailingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( + Builder, Order, IsStore, IsLoad); + // The trailing fence is emitted before the instruction instead of after + // because there is no easy way of setting Builder insertion point after + // an instruction. So we must erase it from the BB, and insert it back + // in the right place. + // We have a guard here because not every atomic operation generates a + // trailing fence. + if (TrailingFence) { + TrailingFence->removeFromParent(); + TrailingFence->insertAfter(I); + } + + return (LeadingFence || TrailingFence); +} + +bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { + if (TM->getSubtargetImpl() + ->getTargetLowering() + ->hasLoadLinkedStoreConditional()) + return expandAtomicLoadToLL(LI); + else + return expandAtomicLoadToCmpXchg(LI); +} + +bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + IRBuilder<> Builder(LI); + + // On some architectures, load-linked instructions are atomic for larger + // sizes than normal loads. For example, the only 64-bit load guaranteed + // to be single-copy atomic by ARM is an ldrexd (A3.5.3). + Value *Val = + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { + IRBuilder<> Builder(LI); + AtomicOrdering Order = LI->getOrdering(); + Value *Addr = LI->getPointerOperand(); + Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); + Constant *DummyVal = Constant::getNullValue(Ty); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, DummyVal, DummyVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); + + LI->replaceAllUsesWith(Loaded); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicStore(StoreInst *SI) { + // This function is only called on atomic stores that are too large to be + // atomic if implemented as a native store. So we replace them by an + // atomic swap, that can be implemented for example as a ldrex/strex on ARM + // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. + // It is the responsibility of the target to only return true in + // shouldExpandAtomicRMW in cases where this is required and possible. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { + if (TM->getSubtargetImpl() + ->getTargetLowering() + ->hasLoadLinkedStoreConditional()) + return expandAtomicRMWToLLSC(AI); + else + return expandAtomicRMWToCmpXchg(AI); +} + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, + Value *Loaded, Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + default: + llvm_unreachable("Unknown atomic op"); + } +} + +bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + AtomicOrdering MemOpOrder = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { + AtomicOrdering MemOpOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + Loaded->addIncoming(NewLoaded, LoopBB); + + Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + // If getInsertFencesForAtomic() returns true, then the target does not want + // to deal with memory orders, and emitLeading/TrailingFence should take care + // of everything. Otherwise, emitLeading/TrailingFence are no-op and we + // should preserve the ordering. + AtomicOrdering MemOpOrder = + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = TLI->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + StoreSuccess = Builder.CreateICmpEQ( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); + + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); + TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(ExitBB); + + Builder.SetInsertPoint(FailureBB); + TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); + return true; +} + +bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { + auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); + if(!C) + return false; + + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + switch(Op) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + return C->isZero(); + case AtomicRMWInst::And: + return C->isMinusOne(); + // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... + default: + return false; + } +} + +bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + + if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { + if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) + expandAtomicLoad(ResultingLoad); + return true; + } + + return false; +} diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index b2737bf754f9..72da80646c5b 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -42,7 +42,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { /// Estimate the cost overhead of SK_Alternate shuffle. unsigned getAltShuffleOverhead(Type *Ty) const; - const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + const TargetLoweringBase *getTLI() const { + return TM->getSubtargetImpl()->getTargetLowering(); + } public: BasicTTI() : ImmutablePass(ID), TM(nullptr) { @@ -90,7 +92,7 @@ public: unsigned getJumpBufSize() const override; bool shouldBuildLookupTables() const override; bool haveFastSqrt(Type *Ty) const override; - void getUnrollingPreferences(Loop *L, + void getUnrollingPreferences(const Function *F, Loop *L, UnrollingPreferences &UP) const override; /// @} @@ -99,10 +101,11 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector) const override; - unsigned getMaximumUnrollFactor() const override; + unsigned getMaxInterleaveFactor() const override; unsigned getRegisterBitWidth(bool Vector) const override; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind) const override; + OperandValueKind, OperandValueProperties, + OperandValueProperties) const override; unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const override; unsigned getCastInstrCost(unsigned Opcode, Type *Dst, @@ -186,9 +189,8 @@ unsigned BasicTTI::getJumpBufSize() const { bool BasicTTI::shouldBuildLookupTables() const { const TargetLoweringBase *TLI = getTLI(); - return TLI->supportJumpTables() && - (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); + return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } bool BasicTTI::haveFastSqrt(Type *Ty) const { @@ -197,7 +199,7 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const { return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); } -void BasicTTI::getUnrollingPreferences(Loop *L, +void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L, UnrollingPreferences &UP) const { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: @@ -223,11 +225,11 @@ void BasicTTI::getUnrollingPreferences(Loop *L, // until someone finds a case where it matters in practice. unsigned MaxOps; - const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(F); if (PartialUnrollingThreshold.getNumOccurrences() > 0) MaxOps = PartialUnrollingThreshold; - else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0) - MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize; + else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) + MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; else return; @@ -282,13 +284,14 @@ unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { return 32; } -unsigned BasicTTI::getMaximumUnrollFactor() const { +unsigned BasicTTI::getMaxInterleaveFactor() const { return 1; } unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, - OperandValueKind) const { + OperandValueKind, OperandValueKind, + OperandValueProperties, + OperandValueProperties) const { // Check if any of the operands are vector operands. const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -465,7 +468,8 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); - if (!TLI->isOperationExpand(ISD, LT.second)) { + if (!(ValTy->isVectorTy() && !LT.second.isVector()) && + !TLI->isOperationExpand(ISD, LT.second)) { // The operation is legal. Assume it costs 1. Multiply // by the type-legalization overhead. return LT.first * 1; @@ -515,7 +519,7 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, if (Opcode == Instruction::Store) LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); else - LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); + LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); } if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { @@ -561,6 +565,8 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::minnum: ISD = ISD::FMINNUM; break; + case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break; case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; @@ -572,6 +578,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; + // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return 0; @@ -582,7 +589,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that thre is some + // If the type is split to multiple registers, assume that there is some // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. if (LT.first > 1) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 7503e5751994..2128da1b8aac 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -20,6 +20,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -32,8 +34,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -70,6 +72,8 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -91,22 +95,24 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); - BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true); - return Folder.OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, + getAnalysis<MachineBlockFrequencyInfo>(), + getAnalysis<MachineBranchProbabilityInfo>()); + return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), + MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } - -BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) { +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &FreqInfo, + const MachineBranchProbabilityInfo &ProbInfo) + : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), + MBPI(ProbInfo) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; case cl::BOU_TRUE: EnableTailMerge = true; break; case cl::BOU_FALSE: EnableTailMerge = false; break; } - - EnableHoistCommonCode = CommonHoist; } /// RemoveDeadBlock - Remove the specified dead machine basic block from the @@ -388,10 +394,8 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, RS->enterBasicBlock(CurMBB); if (!CurMBB->empty()) RS->forward(std::prev(CurMBB->end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) + for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++) + if (RS->isRegUsed(i, false)) NewMBB->addLiveIn(i); } } @@ -435,6 +439,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + // NewMBB inherits CurMBB's block frequency. + MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); + // For targets that use the register scavenger, we must maintain LiveIns. MaintainLiveIns(&CurMBB, NewMBB); @@ -504,6 +511,21 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { #endif } +BlockFrequency +BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + if (I != MergedBBFreq.end()) + return I->second; + + return MBFI.getBlockFreq(MBB); +} + +void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, + BlockFrequency F) { + MergedBBFreq[MBB] = F; +} + /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -806,6 +828,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // Recompute commont tail MBB's edge weights and block frequency. + setCommonTailEdgeWeights(*MBB); + // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() @@ -890,7 +916,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) + if (!UniquePreds.insert(PBB).second) continue; // Skip blocks which may jump to a landing pad. Can't tail merge these. @@ -968,6 +994,44 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { return MadeChange; } +void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { + SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size()); + BlockFrequency AccumulatedMBBFreq; + + // Aggregate edge frequency of successor edge j: + // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)), + // where bb is a basic block that is in SameTails. + for (const auto &Src : SameTails) { + const MachineBasicBlock *SrcMBB = Src.getBlock(); + BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB); + AccumulatedMBBFreq += BlockFreq; + + // It is not necessary to recompute edge weights if TailBB has less than two + // successors. + if (TailMBB.succ_size() <= 1) + continue; + + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI); + } + + MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq); + + if (TailMBB.succ_size() <= 1) + return; + + auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end()); + uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1; + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale); +} + //===----------------------------------------------------------------------===// // Branch Optimization //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 0d15ed7e792a..3653a2ccd623 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -7,14 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP -#define LLVM_CODEGEN_BRANCHFOLDING_HPP +#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H +#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/BlockFrequency.h" #include <vector> namespace llvm { + class MachineBlockFrequencyInfo; + class MachineBranchProbabilityInfo; class MachineFunction; class MachineModuleInfo; class RegScavenger; @@ -23,7 +26,9 @@ namespace llvm { class BranchFolder { public: - explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist); + explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &MBFI, + const MachineBranchProbabilityInfo &MBPI); bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, @@ -92,9 +97,26 @@ namespace llvm { MachineModuleInfo *MMI; RegScavenger *RS; + /// \brief This class keeps track of branch frequencies of newly created + /// blocks and tail-merged blocks. + class MBFIWrapper { + public: + MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; + void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); + + private: + const MachineBlockFrequencyInfo &MBFI; + DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq; + }; + + MBFIWrapper MBBFreqInfo; + const MachineBranchProbabilityInfo &MBPI; + bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); + void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); void MaintainLiveIns(MachineBasicBlock *CurMBB, MachineBasicBlock *NewMBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index b71b30cc1cca..48bf3545e733 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -2,7 +2,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp - AtomicExpandLoadLinkedPass.cpp + AtomicExpandPass.cpp BasicTargetTransformInfo.cpp BranchFolding.cpp CalcSpillWeights.cpp @@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + ForwardControlFlowIntegrity.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp @@ -27,7 +28,6 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp - JITCodeEmitter.cpp JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp @@ -48,7 +48,7 @@ add_llvm_library(LLVMCodeGen MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCSE.cpp - MachineCodeEmitter.cpp + MachineCombiner.cpp MachineCopyPropagation.cpp MachineDominators.cpp MachineDominanceFrontier.cpp @@ -98,13 +98,13 @@ add_llvm_library(LLVMCodeGen SjLjEHPrepare.cpp SlotIndexes.cpp SpillPlacement.cpp - Spiller.cpp SplitKit.cpp StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp StackMapLivenessAnalysis.cpp StackMaps.cpp + StatepointExampleGC.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp TargetInstrInfo.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index bc033f96bcf2..d08fae09323c 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -16,8 +16,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "calcspillweights" @@ -95,11 +95,12 @@ static bool isRematerializable(const LiveInterval &LI, void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); - const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); MachineBasicBlock *mbb = nullptr; MachineLoop *loop = nullptr; bool isExiting = false; float totalWeight = 0; + unsigned numInstr = 0; // Number of instructions using li SmallPtrSet<MachineInstr*, 8> visited; // Find the best physreg hint and the best virtreg hint. @@ -116,9 +117,10 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { MachineInstr *mi = &*(I++); + numInstr++; if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; - if (!visited.insert(mi)) + if (!visited.insert(mi).second) continue; float weight = 1.0f; @@ -186,8 +188,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) + if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalize(totalWeight, li.getSize()); + li.weight = normalize(totalWeight, li.getSize(), numInstr); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index add861a83632..034ffb34b9cc 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -14,21 +14,22 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, - const TargetMachine &tm, SmallVectorImpl<CCValAssign> &locs, - LLVMContext &C) - : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), - CallOrPrologue(Unknown) { + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), MF(mf), + TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Unknown) { // No stack is used. StackOffset = 0; @@ -50,7 +51,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, if (MinAlign > (int)Align) Align = MinAlign; MF.getFrameInfo()->ensureMaxAlignment(Align); - TM.getTargetLowering()->HandleByVal(this, Size, Align); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); + Size = unsigned(RoundUpToAlignment(Size, MinAlign)); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -178,3 +180,70 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { llvm_unreachable(nullptr); } } + +static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { + if (VT.isVector()) + return true; // Assume -msse-regparm might be in effect. + if (!VT.isInteger()) + return false; + if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall) + return true; + return false; +} + +void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, + MVT VT, CCAssignFn Fn) { + unsigned SavedStackOffset = StackOffset; + unsigned NumLocs = Locs.size(); + + // Set the 'inreg' flag if it is used for this calling convention. + ISD::ArgFlagsTy Flags; + if (isValueTypeInRegForCC(CallingConv, VT)) + Flags.setInReg(); + + // Allocate something of this value type repeatedly until we get assigned a + // location in memory. + bool HaveRegParm = true; + while (HaveRegParm) { + if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call has unhandled type " << EVT(VT).getEVTString() + << " while computing remaining regparms\n"; +#endif + llvm_unreachable(nullptr); + } + HaveRegParm = Locs.back().isRegLoc(); + } + + // Copy all the registers from the value locations we added. + assert(NumLocs < Locs.size() && "CC assignment failed to add location"); + for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I) + if (Locs[I].isRegLoc()) + Regs.push_back(MCPhysReg(Locs[I].getLocReg())); + + // Clear the assigned values and stack memory. We leave the registers marked + // as allocated so that future queries don't return the same registers, i.e. + // when i64 and f64 are both passed in GPRs. + StackOffset = SavedStackOffset; + Locs.resize(NumLocs); +} + +void CCState::analyzeMustTailForwardedRegisters( + SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes, + CCAssignFn Fn) { + // Oftentimes calling conventions will not user register parameters for + // variadic functions, so we need to assume we're not variadic so that we get + // all the registers that might be used in a non-variadic call. + SaveAndRestore<bool> SavedVarArg(IsVarArg, false); + + for (MVT RegVT : RegParmTypes) { + SmallVector<MCPhysReg, 8> RemainingRegs; + getRemainingRegParmsForType(RemainingRegs, RegVT, Fn); + const TargetLowering *TL = MF.getSubtarget().getTargetLowering(); + const TargetRegisterClass *RC = TL->getRegClassFor(RegVT); + for (MCPhysReg PReg : RemainingRegs) { + unsigned VReg = MF.addLiveIn(PReg, RC); + Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT)); + } + } +} diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index b3beac3932bf..307dec548fc5 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -20,7 +20,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { - initializeAtomicExpandLoadLinkedPass(Registry); + initializeAtomicExpandPass(Registry); initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); @@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCopyPropagationPass(Registry); + initializeMachineCombinerPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachinePostDominatorTreePass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index d5039b2e8517..82cd3801f583 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -29,6 +30,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" @@ -43,6 +45,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -63,6 +66,7 @@ STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); +STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -80,15 +84,40 @@ static cl::opt<bool> EnableAndCmpSinking( "enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches.")); +static cl::opt<bool> DisableStoreExtract( + "disable-cgp-store-extract", cl::Hidden, cl::init(false), + cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); + +static cl::opt<bool> StressStoreExtract( + "stress-cgp-store-extract", cl::Hidden, cl::init(false), + cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); + +static cl::opt<bool> DisableExtLdPromotion( + "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), + cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " + "CodeGenPrepare")); + +static cl::opt<bool> StressExtLdPromotion( + "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), + cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " + "optimization in CodeGenPrepare")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; -typedef DenseMap<Instruction *, Type *> InstrToOrigTy; +struct TypeIsSExt { + Type *Ty; + bool IsSExt; + TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} +}; +typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; +class TypePromotionTransaction; class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetMachine *TM; const TargetLowering *TLI; + const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; DominatorTree *DT; @@ -118,7 +147,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy; public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr) { + : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -128,6 +157,7 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<DominatorTreeWrapperPass>(); AU.addRequired<TargetLibraryInfo>(); + AU.addRequired<TargetTransformInfo>(); } private: @@ -135,18 +165,24 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy; bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); - bool OptimizeBlock(BasicBlock &BB); - bool OptimizeInst(Instruction *I); + bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT); + bool OptimizeInst(Instruction *I, bool& ModifiedDT); bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); bool OptimizeInlineAsmInst(CallInst *CS); - bool OptimizeCallInst(CallInst *CI); - bool MoveExtToFormExtLoad(Instruction *I); + bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool MoveExtToFormExtLoad(Instruction *&I); bool OptimizeExtUses(Instruction *I); bool OptimizeSelectInst(SelectInst *SI); bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool OptimizeExtractElementInst(Instruction *Inst); bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); bool sinkAndCmp(Function &F); + bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, + Instruction *&Inst, + const SmallVectorImpl<Instruction *> &Exts, + unsigned CreatedInst); + bool splitBranchCondition(Function &F); }; } @@ -168,8 +204,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PromotedInsts.clear(); ModifiedDT = false; - if (TM) TLI = TM->getTargetLowering(); + if (TM) + TLI = TM->getSubtargetImpl()->getTargetLowering(); TLInfo = &getAnalysis<TargetLibraryInfo>(); + TTI = &getAnalysis<TargetTransformInfo>(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; @@ -198,15 +236,23 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // into a single target instruction, push the mask and compare into branch // users. Do this before OptimizeBlock -> OptimizeInst -> // OptimizeCmpExpression, which perturbs the pattern being searched for. - if (!DisableBranchOpts) + if (!DisableBranchOpts) { EverMadeChange |= sinkAndCmp(F); + EverMadeChange |= splitBranchCondition(F); + } bool MadeChange = true; while (MadeChange) { MadeChange = false; for (Function::iterator I = F.begin(); I != F.end(); ) { BasicBlock *BB = I++; - MadeChange |= OptimizeBlock(*BB); + bool ModifiedDTOnIteration = false; + MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + ModifiedDT |= ModifiedDTOnIteration; + if (ModifiedDTOnIteration) + break; } EverMadeChange |= MadeChange; } @@ -216,9 +262,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (!DisableBranchOpts) { MadeChange = false; SmallPtrSet<BasicBlock*, 8> WorkList; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); - MadeChange |= ConstantFoldTerminator(BB, true); + for (BasicBlock &BB : F) { + SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); + MadeChange |= ConstantFoldTerminator(&BB, true); if (!MadeChange) continue; for (SmallVectorImpl<BasicBlock*>::iterator @@ -662,10 +708,13 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, if (!ISDOpcode) continue; - // If the use is actually a legal node, there will not be an implicit - // truncate. - if (TLI.isOperationLegalOrCustom(ISDOpcode, - EVT::getEVT(TruncUser->getType()))) + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(TruncUser->getType(), true))) continue; // Don't bother for PHI nodes. @@ -799,23 +848,211 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -namespace { -class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { -protected: - void replaceCall(Value *With) override { - CI->replaceAllUsesWith(With); - CI->eraseFromParent(); +// ScalarizeMaskedLoad() translates masked load intrinsic, like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, whith loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.load, label %else +// +//cond.load: ; preds = %0 +// %4 = getelementptr i32* %1, i32 0 +// %5 = load i32* %4 +// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// br label %else +// +//else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] +// %7 = extractelement <16 x i1> %mask, i32 1 +// %8 = icmp eq i1 %7, true +// br i1 %8, label %cond.load1, label %else2 +// +//cond.load1: ; preds = %else +// %9 = getelementptr i32* %1, i32 1 +// %10 = load i32* %9 +// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// br label %else2 +// +//else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] +// %12 = extractelement <16 x i1> %mask, i32 2 +// %13 = icmp eq i1 %12, true +// br i1 %13, label %cond.load4, label %else5 +// +static void ScalarizeMaskedLoad(CallInst *CI) { + Value *Ptr = CI->getArgOperand(0); + Value *Src0 = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(2); + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + Type *EltTy = VecType->getElementType(); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + unsigned VectorWidth = VecType->getNumElements(); + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_load = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + LoadInst* Load = Builder.CreateLoad(Gep, false); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; } - bool isFoldable(unsigned SizeCIOp, unsigned, bool) const override { - if (ConstantInt *SizeCI = - dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) - return SizeCI->isAllOnesValue(); - return false; + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// ScalarizeMaskedStore() translates masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %4 = extractelement <16 x i32> %val, i32 0 +// %5 = getelementptr i32* %1, i32 0 +// store i32 %4, i32* %5 +// br label %else +// +// else: ; preds = %0, %cond.store +// %6 = extractelement <16 x i1> %mask, i32 1 +// %7 = icmp eq i1 %6, true +// br i1 %7, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %8 = extractelement <16 x i32> %val, i32 1 +// %9 = getelementptr i32* %1, i32 1 +// store i32 %8, i32* %9 +// br label %else2 +// . . . +static void ScalarizeMaskedStore(CallInst *CI) { + Value *Ptr = CI->getArgOperand(1); + Value *Src = CI->getArgOperand(0); + Value *Mask = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast<VectorType>(Src->getType()); + Type *EltTy = VecType->getElementType(); + + assert(VecType && "Unexpected data type in masked store intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + + unsigned VectorWidth = VecType->getNumElements(); + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_store = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value* Gep = Builder.CreateInBoundsGEP(FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateStore(OneElt, Gep); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; } -}; -} // end anonymous namespace + CI->eraseFromParent(); +} -bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { +bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -835,38 +1072,60 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return true; } - // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); - if (II && II->getIntrinsicID() == Intrinsic::objectsize) { - bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); - Type *ReturnTy = CI->getType(); - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - - // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakVH to hold onto it in case this - // happens. - WeakVH IterHandle(CurInstIterator); - - replaceAndRecursivelySimplify(CI, RetVal, - TLI ? TLI->getDataLayout() : nullptr, - TLInfo, ModifiedDT ? nullptr : DT); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::objectsize: { + // Lower all uses of llvm.objectsize.* + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI->getType(); + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(CurInstIterator); + + replaceAndRecursivelySimplify(CI, RetVal, + TLI ? TLI->getDataLayout() : nullptr, + TLInfo, ModifiedDT ? nullptr : DT); - // If the iterator instruction was recursively deleted, start over at the - // start of the block. - if (IterHandle != CurInstIterator) { - CurInstIterator = BB->begin(); - SunkAddrs.clear(); + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + return true; + } + case Intrinsic::masked_load: { + // Scalarize unsupported vector masked load + if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) { + ScalarizeMaskedLoad(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_store: { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) { + ScalarizeMaskedStore(CI); + ModifiedDT = true; + return true; + } + return false; + } } - return true; - } - if (II && TLI) { - SmallVector<Value*, 2> PtrOps; - Type *AccessTy; - if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) - while (!PtrOps.empty()) - if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) - return true; + if (TLI) { + SmallVector<Value*, 2> PtrOps; + Type *AccessTy; + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + return true; + } } // From here on out we're working with named functions. @@ -878,10 +1137,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls - // that have the default "don't know" as the objectsize. Anything else - // should be left alone. - CodeGenPrepareFortifiedLibCalls Simplifier; - return Simplifier.fold(CI, TD, TLInfo); + // to fortified library functions (e.g. __memcpy_chk) that have the default + // "don't know" as the objectsize. Anything else should be left alone. + FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true); + if (Value *V = Simplifier.optimizeCall(CI)) { + CI->replaceAllUsesWith(V); + CI->eraseFromParent(); + return true; + } + return false; } /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return @@ -978,7 +1242,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { } else { SmallPtrSet<BasicBlock*, 4> VisitedBBs; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - if (!VisitedBBs.insert(*PI)) + if (!VisitedBBs.insert(*PI).second) continue; BasicBlock::InstListType &InstList = (*PI)->getInstList(); @@ -1250,46 +1514,75 @@ class TypePromotionTransaction { /// \brief Build a truncate instruction. class TruncBuilder : public TypePromotionAction { + Value *Val; public: /// \brief Build a truncate instruction of \p Opnd producing a \p Ty /// result. /// trunc Opnd to Ty. TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { IRBuilder<> Builder(Opnd); - Inst = cast<Instruction>(Builder.CreateTrunc(Opnd, Ty, "promoted")); - DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); + Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); } - /// \brief Get the built instruction. - Instruction *getBuiltInstruction() { return Inst; } + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } /// \brief Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); - Inst->eraseFromParent(); + DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast<Instruction>(Val)) + IVal->eraseFromParent(); } }; /// \brief Build a sign extension instruction. class SExtBuilder : public TypePromotionAction { + Value *Val; public: /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty /// result. /// sext Opnd to Ty. SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) - : TypePromotionAction(Inst) { + : TypePromotionAction(InsertPt) { + IRBuilder<> Builder(InsertPt); + Val = Builder.CreateSExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); + } + + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast<Instruction>(Val)) + IVal->eraseFromParent(); + } + }; + + /// \brief Build a zero extension instruction. + class ZExtBuilder : public TypePromotionAction { + Value *Val; + public: + /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty + /// result. + /// zext Opnd to Ty. + ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(InsertPt) { IRBuilder<> Builder(InsertPt); - Inst = cast<Instruction>(Builder.CreateSExt(Opnd, Ty, "promoted")); - DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); + Val = Builder.CreateZExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); } - /// \brief Get the built instruction. - Instruction *getBuiltInstruction() { return Inst; } + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } /// \brief Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); - Inst->eraseFromParent(); + DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast<Instruction>(Val)) + IVal->eraseFromParent(); } }; @@ -1418,9 +1711,11 @@ public: /// Same as Value::mutateType. void mutateType(Instruction *Inst, Type *NewTy); /// Same as IRBuilder::createTrunc. - Instruction *createTrunc(Instruction *Opnd, Type *Ty); + Value *createTrunc(Instruction *Opnd, Type *Ty); /// Same as IRBuilder::createSExt. - Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as IRBuilder::createZExt. + Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); /// Same as Instruction::moveBefore. void moveBefore(Instruction *Inst, Instruction *Before); /// @} @@ -1452,20 +1747,28 @@ void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); } -Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, - Type *Ty) { +Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, + Type *Ty) { std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); - Instruction *I = Ptr->getBuiltInstruction(); + Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); - return I; + return Val; } -Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, - Value *Opnd, Type *Ty) { +Value *TypePromotionTransaction::createSExt(Instruction *Inst, + Value *Opnd, Type *Ty) { std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); - Instruction *I = Ptr->getBuiltInstruction(); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; +} + +Value *TypePromotionTransaction::createZExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); - return I; + return Val; } void TypePromotionTransaction::moveBefore(Instruction *Inst, @@ -1656,60 +1959,107 @@ static bool MightBeFoldableInst(Instruction *I) { } } +/// \brief Check whether or not \p Val is a legal instruction for \p TLI. +/// \note \p Val is assumed to be the product of some type promotion. +/// Therefore if \p Val has an undefined state in \p TLI, this is assumed +/// to be legal, as the non-promoted value would have had the same state. +static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) { + Instruction *PromotedInst = dyn_cast<Instruction>(Val); + if (!PromotedInst) + return false; + int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); + // If the ISDOpcode is undefined, it was undefined before the promotion. + if (!ISDOpcode) + return true; + // Otherwise, check if the promoted instruction is legal or not. + return TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(PromotedInst->getType())); +} + /// \brief Hepler class to perform type promotion. class TypePromotionHelper { - /// \brief Utility function to check whether or not a sign extension of - /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either - /// using the operands of \p Inst or promoting \p Inst. + /// \brief Utility function to check whether or not a sign or zero extension + /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by + /// either using the operands of \p Inst or promoting \p Inst. + /// The type of the extension is defined by \p IsSExt. /// In other words, check if: - /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. + /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. /// #1 Promotion applies: - /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). + /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). /// #2 Operand reuses: - /// sext opnd1 to ConsideredSExtType. + /// ext opnd1 to ConsideredExtType. /// \p PromotedInsts maps the instructions to their type before promotion. - static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, - const InstrToOrigTy &PromotedInsts); + static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, + const InstrToOrigTy &PromotedInsts, bool IsSExt); /// \brief Utility function to determine if \p OpIdx should be promoted when /// promoting \p Inst. - static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { + static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { if (isa<SelectInst>(Inst) && OpIdx == 0) return false; return true; } - /// \brief Utility function to promote the operand of \p SExt when this - /// operand is a promotable trunc or sext. + /// \brief Utility function to promote the operand of \p Ext when this + /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been - /// created to promote the operand of SExt. + /// created to promote the operand of Ext. + /// Newly added extensions are inserted in \p Exts. + /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. - /// \return The promoted value which is used instead of SExt. - static Value *promoteOperandForTruncAndSExt(Instruction *SExt, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); - - /// \brief Utility function to promote the operand of \p SExt when this + /// \return The promoted value which is used instead of Ext. + static Value *promoteOperandForTruncAndAnyExt( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs); + + /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been - /// created to promote the operand of SExt. + /// created to promote the operand of Ext. + /// Newly added extensions are inserted in \p Exts. + /// Newly added truncates are inserted in \p Truncs. /// Should never be called directly. - /// \return The promoted value which is used instead of SExt. - static Value *promoteOperandForOther(Instruction *SExt, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); + /// \return The promoted value which is used instead of Ext. + static Value * + promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, bool IsSExt); + + /// \see promoteOperandForOther. + static Value * + signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, + Truncs, true); + } + + /// \see promoteOperandForOther. + static Value * + zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts, + Truncs, false); + } public: - /// Type for the utility function that promotes the operand of SExt. - typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); - /// \brief Given a sign extend instruction \p SExt, return the approriate - /// action to promote the operand of \p SExt instead of using SExt. + /// Type for the utility function that promotes the operand of Ext. + typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs); + /// \brief Given a sign/zero extend instruction \p Ext, return the approriate + /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current /// sign extension. /// \p InsertedTruncs keeps track of all the truncate instructions inserted by @@ -1717,36 +2067,49 @@ public: /// because we do not want to promote these instructions as CodeGenPrepare /// will reinsert them later. Thus creating an infinite loop: create/remove. /// \p PromotedInsts maps the instructions to their type before promotion. - static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, + static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs, const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts); }; bool TypePromotionHelper::canGetThrough(const Instruction *Inst, - Type *ConsideredSExtType, - const InstrToOrigTy &PromotedInsts) { - // We can always get through sext. - if (isa<SExtInst>(Inst)) + Type *ConsideredExtType, + const InstrToOrigTy &PromotedInsts, + bool IsSExt) { + // The promotion helper does not know how to deal with vector types yet. + // To be able to fix that, we would need to fix the places where we + // statically extend, e.g., constants and such. + if (Inst->getType()->isVectorTy()) + return false; + + // We can always get through zext. + if (isa<ZExtInst>(Inst)) + return true; + + // sext(sext) is ok too. + if (IsSExt && isa<SExtInst>(Inst)) return true; // We can get through binary operator, if it is legal. In other words, the // binary operator must have a nuw or nsw flag. const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && - (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) + ((!IsSExt && BinOp->hasNoUnsignedWrap()) || + (IsSExt && BinOp->hasNoSignedWrap()))) return true; // Check if we can do the following simplification. - // sext(trunc(sext)) --> sext + // ext(trunc(opnd)) --> ext(opnd) if (!isa<TruncInst>(Inst)) return false; Value *OpndVal = Inst->getOperand(0); - // Check if we can use this operand in the sext. - // If the type is larger than the result type of the sign extension, + // Check if we can use this operand in the extension. + // If the type is larger than the result type of the extension, // we cannot. - if (OpndVal->getType()->getIntegerBitWidth() > - ConsideredSExtType->getIntegerBitWidth()) + if (!OpndVal->getType()->isIntegerTy() || + OpndVal->getType()->getIntegerBitWidth() > + ConsideredExtType->getIntegerBitWidth()) return false; // If the operand of the truncate is not an instruction, we will not have @@ -1757,18 +2120,19 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, return false; // Check if the source of the type is narrow enough. - // I.e., check that trunc just drops sign extended bits. - // #1 get the type of the operand. + // I.e., check that trunc just drops extended bits of the same kind of + // the extension. + // #1 get the type of the operand and check the kind of the extended bits. const Type *OpndType; InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); - if (It != PromotedInsts.end()) - OpndType = It->second; - else if (isa<SExtInst>(Opnd)) - OpndType = cast<Instruction>(Opnd)->getOperand(0)->getType(); + if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt) + OpndType = It->second.Ty; + else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) + OpndType = Opnd->getOperand(0)->getType(); else return false; - // #2 check that the truncate just drop sign extended bits. + // #2 check that the truncate just drop extended bits. if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) return true; @@ -1776,149 +2140,181 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, } TypePromotionHelper::Action TypePromotionHelper::getAction( - Instruction *SExt, const SetOfInstrs &InsertedTruncs, + Instruction *Ext, const SetOfInstrs &InsertedTruncs, const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { - Instruction *SExtOpnd = dyn_cast<Instruction>(SExt->getOperand(0)); - Type *SExtTy = SExt->getType(); - // If the operand of the sign extension is not an instruction, we cannot + assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && + "Unexpected instruction type"); + Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); + Type *ExtTy = Ext->getType(); + bool IsSExt = isa<SExtInst>(Ext); + // If the operand of the extension is not an instruction, we cannot // get through. // If it, check we can get through. - if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) + if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) return nullptr; // Do not promote if the operand has been added by codegenprepare. // Otherwise, it means we are undoing an optimization that is likely to be // redone, thus causing potential infinite loop. - if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) + if (isa<TruncInst>(ExtOpnd) && InsertedTruncs.count(ExtOpnd)) return nullptr; // SExt or Trunc instructions. // Return the related handler. - if (isa<SExtInst>(SExtOpnd) || isa<TruncInst>(SExtOpnd)) - return promoteOperandForTruncAndSExt; + if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || + isa<ZExtInst>(ExtOpnd)) + return promoteOperandForTruncAndAnyExt; // Regular instruction. // Abort early if we will have to insert non-free instructions. - if (!SExtOpnd->hasOneUse() && - !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) + if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) return nullptr; - return promoteOperandForOther; + return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; } -Value *TypePromotionHelper::promoteOperandForTruncAndSExt( +Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); - // Replace sext(trunc(opnd)) or sext(sext(opnd)) - // => sext(opnd). - TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + Value *ExtVal = SExt; + if (isa<ZExtInst>(SExtOpnd)) { + // Replace s|zext(zext(opnd)) + // => zext(opnd). + Value *ZExt = + TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); + TPT.replaceAllUsesWith(SExt, ZExt); + TPT.eraseInstruction(SExt); + ExtVal = ZExt; + } else { + // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) + // => z|sext(opnd). + TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + } CreatedInsts = 0; // Remove dead code. if (SExtOpnd->use_empty()) TPT.eraseInstruction(SExtOpnd); - // Check if the sext is still needed. - if (SExt->getType() != SExt->getOperand(0)->getType()) - return SExt; + // Check if the extension is still needed. + Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); + if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { + if (ExtInst && Exts) + Exts->push_back(ExtInst); + return ExtVal; + } - // At this point we have: sext ty opnd to ty. - // Reassign the uses of SExt to the opnd and remove SExt. - Value *NextVal = SExt->getOperand(0); - TPT.eraseInstruction(SExt, NextVal); + // At this point we have: ext ty opnd to ty. + // Reassign the uses of ExtInst to the opnd and remove ExtInst. + Value *NextVal = ExtInst->getOperand(0); + TPT.eraseInstruction(ExtInst, NextVal); return NextVal; } -Value * -TypePromotionHelper::promoteOperandForOther(Instruction *SExt, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts) { - // By construction, the operand of SExt is an instruction. Otherwise we cannot +Value *TypePromotionHelper::promoteOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) { + // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. - Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); + Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); CreatedInsts = 0; - if (!SExtOpnd->hasOneUse()) { - // SExtOpnd will be promoted. - // All its uses, but SExt, will need to use a truncated value of the + if (!ExtOpnd->hasOneUse()) { + // ExtOpnd will be promoted. + // All its uses, but Ext, will need to use a truncated value of the // promoted version. // Create the truncate now. - Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); - Trunc->removeFromParent(); - // Insert it just after the definition. - Trunc->insertAfter(SExtOpnd); + Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); + if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { + ITrunc->removeFromParent(); + // Insert it just after the definition. + ITrunc->insertAfter(ExtOpnd); + if (Truncs) + Truncs->push_back(ITrunc); + } - TPT.replaceAllUsesWith(SExtOpnd, Trunc); - // Restore the operand of SExt (which has been replace by the previous call + TPT.replaceAllUsesWith(ExtOpnd, Trunc); + // Restore the operand of Ext (which has been replace by the previous call // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. - TPT.setOperand(SExt, 0, SExtOpnd); + TPT.setOperand(Ext, 0, ExtOpnd); } // Get through the Instruction: // 1. Update its type. - // 2. Replace the uses of SExt by Inst. - // 3. Sign extend each operand that needs to be sign extended. + // 2. Replace the uses of Ext by Inst. + // 3. Extend each operand that needs to be extended. // Remember the original type of the instruction before promotion. // This is useful to know that the high bits are sign extended bits. - PromotedInsts.insert( - std::pair<Instruction *, Type *>(SExtOpnd, SExtOpnd->getType())); + PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>( + ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); // Step #1. - TPT.mutateType(SExtOpnd, SExt->getType()); + TPT.mutateType(ExtOpnd, Ext->getType()); // Step #2. - TPT.replaceAllUsesWith(SExt, SExtOpnd); + TPT.replaceAllUsesWith(Ext, ExtOpnd); // Step #3. - Instruction *SExtForOpnd = SExt; + Instruction *ExtForOpnd = Ext; - DEBUG(dbgs() << "Propagate SExt to operands\n"); - for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; + DEBUG(dbgs() << "Propagate Ext to operands\n"); + for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; ++OpIdx) { - DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); - if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || - !shouldSExtOperand(SExtOpnd, OpIdx)) { + DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); + if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || + !shouldExtOperand(ExtOpnd, OpIdx)) { DEBUG(dbgs() << "No need to propagate\n"); continue; } - // Check if we can statically sign extend the operand. - Value *Opnd = SExtOpnd->getOperand(OpIdx); + // Check if we can statically extend the operand. + Value *Opnd = ExtOpnd->getOperand(OpIdx); if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - TPT.setOperand( - SExtOpnd, OpIdx, - ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); + DEBUG(dbgs() << "Statically extend\n"); + unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); + APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) + : Cst->getValue().zext(BitWidth); + TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); continue; } // UndefValue are typed, so we have to statically sign extend them. if (isa<UndefValue>(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); + DEBUG(dbgs() << "Statically extend\n"); + TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); continue; } // Otherwise we have to explicity sign extend the operand. - // Check if SExt was reused to sign extend an operand. - if (!SExtForOpnd) { + // Check if Ext was reused to extend an operand. + if (!ExtForOpnd) { // If yes, create a new one. - DEBUG(dbgs() << "More operands to sext\n"); - SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); + DEBUG(dbgs() << "More operands to ext\n"); + Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) + : TPT.createZExt(Ext, Opnd, Ext->getType()); + if (!isa<Instruction>(ValForExtOpnd)) { + TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); + continue; + } + ExtForOpnd = cast<Instruction>(ValForExtOpnd); ++CreatedInsts; } - - TPT.setOperand(SExtForOpnd, 0, Opnd); + if (Exts) + Exts->push_back(ExtForOpnd); + TPT.setOperand(ExtForOpnd, 0, Opnd); // Move the sign extension before the insertion point. - TPT.moveBefore(SExtForOpnd, SExtOpnd); - TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); + TPT.moveBefore(ExtForOpnd, ExtOpnd); + TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); // If more sext are required, new instructions will have to be created. - SExtForOpnd = nullptr; + ExtForOpnd = nullptr; } - if (SExtForOpnd == SExt) { - DEBUG(dbgs() << "Sign extension is useless now\n"); - TPT.eraseInstruction(SExt); + if (ExtForOpnd == Ext) { + DEBUG(dbgs() << "Extension is useless now\n"); + TPT.eraseInstruction(Ext); } - return SExtOpnd; + return ExtOpnd; } /// IsPromotionProfitable - Check whether or not promoting an instruction @@ -1943,16 +2339,7 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, // The promotion is neutral but it may help folding the sign extension in // loads for instance. // Check that we did not create an illegal instruction. - Instruction *PromotedInst = dyn_cast<Instruction>(PromotedOperand); - if (!PromotedInst) - return false; - int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); - // If the ISDOpcode is undefined, it was undefined before the promotion. - if (!ISDOpcode) - return true; - // Otherwise, check if the promoted instruction is legal or not. - return TLI.isOperationLegalOrCustom(ISDOpcode, - EVT::getEVT(PromotedInst->getType())); + return isPromotedInstructionLegal(TLI, PromotedOperand); } /// MatchOperationAddr - Given an instruction or constant expr, see if we can @@ -2130,31 +2517,33 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return true; } - case Instruction::SExt: { - Instruction *SExt = dyn_cast<Instruction>(AddrInst); - if (!SExt) + case Instruction::SExt: + case Instruction::ZExt: { + Instruction *Ext = dyn_cast<Instruction>(AddrInst); + if (!Ext) return false; - // Try to move this sext out of the way of the addressing mode. + // Try to move this ext out of the way of the addressing mode. // Ask for a method for doing so. - TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( - SExt, InsertedTruncs, TLI, PromotedInsts); + TypePromotionHelper::Action TPH = + TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts); if (!TPH) return false; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); unsigned CreatedInsts = 0; - Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); + Value *PromotedOperand = + TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. // E.g., // op = add opnd, 1 - // idx = sext op + // idx = ext op // addr = gep base, idx // is now: - // promotedOpnd = sext opnd <- no match here + // promotedOpnd = ext opnd <- no match here // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) // addr = gep base, op <- match if (MovedAway) @@ -2293,10 +2682,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, - SmallPtrSet<Instruction*, 16> &ConsideredInsts, + SmallPtrSetImpl<Instruction*> &ConsideredInsts, const TargetLowering &TLI) { // If we already considered this instruction, we're done. - if (!ConsideredInsts.insert(I)) + if (!ConsideredInsts.insert(I).second) return false; // If this is an obviously unfoldable instruction, bail out. @@ -2510,7 +2899,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, worklist.pop_back(); // Break use-def graph loops. - if (!Visited.insert(V)) { + if (!Visited.insert(V).second) { Consensus = nullptr; break; } @@ -2700,8 +3089,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (AddrMode.BaseOffs) { Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (ResultIndex) { - // We need to add this separately from the scale above to help with - // SDAG consecutive load/store merging. + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); @@ -2844,26 +3233,186 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { return MadeChange; } +/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or +/// sign extensions. +static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { + assert(!Inst->use_empty() && "Input must have at least one use"); + const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin()); + bool IsSExt = isa<SExtInst>(FirstUser); + Type *ExtTy = FirstUser->getType(); + for (const User *U : Inst->users()) { + const Instruction *UI = cast<Instruction>(U); + if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) + return false; + Type *CurTy = UI->getType(); + // Same input and output types: Same instruction after CSE. + if (CurTy == ExtTy) + continue; + + // If IsSExt is true, we are in this situation: + // a = Inst + // b = sext ty1 a to ty2 + // c = sext ty1 a to ty3 + // Assuming ty2 is shorter than ty3, this could be turned into: + // a = Inst + // b = sext ty1 a to ty2 + // c = sext ty2 b to ty3 + // However, the last sext is not free. + if (IsSExt) + return false; + + // This is a ZExt, maybe this is free to extend from one type to another. + // In that case, we would not account for a different use. + Type *NarrowTy; + Type *LargeTy; + if (ExtTy->getScalarType()->getIntegerBitWidth() > + CurTy->getScalarType()->getIntegerBitWidth()) { + NarrowTy = CurTy; + LargeTy = ExtTy; + } else { + NarrowTy = ExtTy; + LargeTy = CurTy; + } + + if (!TLI.isZExtFree(NarrowTy, LargeTy)) + return false; + } + // All uses are the same or can be derived from one another for free. + return true; +} + +/// \brief Try to form ExtLd by promoting \p Exts until they reach a +/// load instruction. +/// If an ext(load) can be formed, it is returned via \p LI for the load +/// and \p Inst for the extension. +/// Otherwise LI == nullptr and Inst == nullptr. +/// When some promotion happened, \p TPT contains the proper state to +/// revert them. +/// +/// \return true when promoting was necessary to expose the ext(load) +/// opportunity, false otherwise. +/// +/// Example: +/// \code +/// %ld = load i32* %addr +/// %add = add nuw i32 %ld, 4 +/// %zext = zext i32 %add to i64 +/// \endcode +/// => +/// \code +/// %ld = load i32* %addr +/// %zext = zext i32 %ld to i64 +/// %add = add nuw i64 %zext, 4 +/// \encode +/// Thanks to the promotion, we can match zext(load i32*) to i64. +bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT, + LoadInst *&LI, Instruction *&Inst, + const SmallVectorImpl<Instruction *> &Exts, + unsigned CreatedInsts = 0) { + // Iterate over all the extensions to see if one form an ext(load). + for (auto I : Exts) { + // Check if we directly have ext(load). + if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) { + Inst = I; + // No promotion happened here. + return false; + } + // Check whether or not we want to do any promotion. + if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) + continue; + // Get the action to perform the promotion. + TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( + I, InsertedTruncsSet, *TLI, PromotedInsts); + // Check if we can promote. + if (!TPH) + continue; + // Save the current state. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector<Instruction *, 4> NewExts; + unsigned NewCreatedInsts = 0; + // Promote. + Value *PromotedVal = + TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr); + assert(PromotedVal && + "TypePromotionHelper should have filtered out those cases"); + + // We would be able to merge only one extension in a load. + // Therefore, if we have more than 1 new extension we heuristically + // cut this search path, because it means we degrade the code quality. + // With exactly 2, the transformation is neutral, because we will merge + // one extension but leave one. However, we optimistically keep going, + // because the new extension may be removed too. + unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts; + if (!StressExtLdPromotion && + (TotalCreatedInsts > 1 || + !isPromotedInstructionLegal(*TLI, PromotedVal))) { + // The promotion is not profitable, rollback to the previous state. + TPT.rollback(LastKnownGood); + continue; + } + // The promotion is profitable. + // Check if it exposes an ext(load). + (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts); + if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 || + // If we have created a new extension, i.e., now we have two + // extensions. We must make sure one of them is merged with + // the load, otherwise we may degrade the code quality. + (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) + // Promotion happened. + return true; + // If this does not help to expose an ext(load) then, rollback. + TPT.rollback(LastKnownGood); + } + // None of the extension can form an ext(load). + LI = nullptr; + Inst = nullptr; + return false; +} + /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same /// basic block as the load, unless conditions are unfavorable. This allows /// SelectionDAG to fold the extend into the load. +/// \p I[in/out] the extension may be modified during the process if some +/// promotions apply. /// -bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { +bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) { + // Try to promote a chain of computation if it allows to form + // an extended load. + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector<Instruction *, 1> Exts; + Exts.push_back(I); // Look for a load being extended. - LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); - if (!LI) return false; + LoadInst *LI = nullptr; + Instruction *OldExt = I; + bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts); + if (!LI || !I) { + assert(!HasPromoted && !LI && "If we did not match any load instruction " + "the code must remain the same"); + I = OldExt; + return false; + } // If they're already in the same block, there's nothing to do. - if (LI->getParent() == I->getParent()) + // Make the cheap checks first if we did not promote. + // If we promoted, we need to check if it is indeed profitable. + if (!HasPromoted && LI->getParent() == I->getParent()) return false; + EVT VT = TLI->getValueType(I->getType()); + EVT LoadVT = TLI->getValueType(LI->getType()); + // If the load has other users and the truncate is not free, this probably // isn't worthwhile. - if (!LI->hasOneUse() && - TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || - !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && - !TLI->isTruncateFree(I->getType(), LI->getType())) + if (!LI->hasOneUse() && TLI && + (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && + !TLI->isTruncateFree(I->getType(), LI->getType())) { + I = OldExt; + TPT.rollback(LastKnownGood); return false; + } // Check whether the target supports casts folded into loads. unsigned LType; @@ -2873,11 +3422,15 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { assert(isa<SExtInst>(I) && "Unexpected ext type!"); LType = ISD::SEXTLOAD; } - if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) + if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) { + I = OldExt; + TPT.rollback(LastKnownGood); return false; + } // Move the extend into the same block as the load, so that SelectionDAG // can fold it. + TPT.commit(); I->removeFromParent(); I->insertAfter(LI); ++NumExtsMoved; @@ -3109,7 +3662,511 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { return MadeChange; } -bool CodeGenPrepare::OptimizeInst(Instruction *I) { +namespace { +/// \brief Helper class to promote a scalar operation to a vector one. +/// This class is used to move downward extractelement transition. +/// E.g., +/// a = vector_op <2 x i32> +/// b = extractelement <2 x i32> a, i32 0 +/// c = scalar_op b +/// store c +/// +/// => +/// a = vector_op <2 x i32> +/// c = vector_op a (equivalent to scalar_op on the related lane) +/// * d = extractelement <2 x i32> c, i32 0 +/// * store d +/// Assuming both extractelement and store can be combine, we get rid of the +/// transition. +class VectorPromoteHelper { + /// Used to perform some checks on the legality of vector operations. + const TargetLowering &TLI; + + /// Used to estimated the cost of the promoted chain. + const TargetTransformInfo &TTI; + + /// The transition being moved downwards. + Instruction *Transition; + /// The sequence of instructions to be promoted. + SmallVector<Instruction *, 4> InstsToBePromoted; + /// Cost of combining a store and an extract. + unsigned StoreExtractCombineCost; + /// Instruction that will be combined with the transition. + Instruction *CombineInst; + + /// \brief The instruction that represents the current end of the transition. + /// Since we are faking the promotion until we reach the end of the chain + /// of computation, we need a way to get the current end of the transition. + Instruction *getEndOfTransition() const { + if (InstsToBePromoted.empty()) + return Transition; + return InstsToBePromoted.back(); + } + + /// \brief Return the index of the original value in the transition. + /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, + /// c, is at index 0. + unsigned getTransitionOriginalValueIdx() const { + assert(isa<ExtractElementInst>(Transition) && + "Other kind of transitions are not supported yet"); + return 0; + } + + /// \brief Return the index of the index in the transition. + /// E.g., for "extractelement <2 x i32> c, i32 0" the index + /// is at index 1. + unsigned getTransitionIdx() const { + assert(isa<ExtractElementInst>(Transition) && + "Other kind of transitions are not supported yet"); + return 1; + } + + /// \brief Get the type of the transition. + /// This is the type of the original value. + /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the + /// transition is <2 x i32>. + Type *getTransitionType() const { + return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); + } + + /// \brief Promote \p ToBePromoted by moving \p Def downward through. + /// I.e., we have the following sequence: + /// Def = Transition <ty1> a to <ty2> + /// b = ToBePromoted <ty2> Def, ... + /// => + /// b = ToBePromoted <ty1> a, ... + /// Def = Transition <ty1> ToBePromoted to <ty2> + void promoteImpl(Instruction *ToBePromoted); + + /// \brief Check whether or not it is profitable to promote all the + /// instructions enqueued to be promoted. + bool isProfitableToPromote() { + Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); + unsigned Index = isa<ConstantInt>(ValIdx) + ? cast<ConstantInt>(ValIdx)->getZExtValue() + : -1; + Type *PromotedType = getTransitionType(); + + StoreInst *ST = cast<StoreInst>(CombineInst); + unsigned AS = ST->getPointerAddressSpace(); + unsigned Align = ST->getAlignment(); + // Check if this store is supported. + if (!TLI.allowsMisalignedMemoryAccesses( + TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) { + // If this is not supported, there is no way we can combine + // the extract with the store. + return false; + } + + // The scalar chain of computation has to pay for the transition + // scalar to vector. + // The vector chain has to account for the combining cost. + uint64_t ScalarCost = + TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); + uint64_t VectorCost = StoreExtractCombineCost; + for (const auto &Inst : InstsToBePromoted) { + // Compute the cost. + // By construction, all instructions being promoted are arithmetic ones. + // Moreover, one argument is a constant that can be viewed as a splat + // constant. + Value *Arg0 = Inst->getOperand(0); + bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || + isa<ConstantFP>(Arg0); + TargetTransformInfo::OperandValueKind Arg0OVK = + IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue + : TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Arg1OVK = + !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue + : TargetTransformInfo::OK_AnyValue; + ScalarCost += TTI.getArithmeticInstrCost( + Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); + VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, + Arg0OVK, Arg1OVK); + } + DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " + << ScalarCost << "\nVector: " << VectorCost << '\n'); + return ScalarCost > VectorCost; + } + + /// \brief Generate a constant vector with \p Val with the same + /// number of elements as the transition. + /// \p UseSplat defines whether or not \p Val should be replicated + /// accross the whole vector. + /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, + /// otherwise we generate a vector with as many undef as possible: + /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only + /// used at the index of the extract. + Value *getConstantVector(Constant *Val, bool UseSplat) const { + unsigned ExtractIdx = UINT_MAX; + if (!UseSplat) { + // If we cannot determine where the constant must be, we have to + // use a splat constant. + Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); + if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) + ExtractIdx = CstVal->getSExtValue(); + else + UseSplat = true; + } + + unsigned End = getTransitionType()->getVectorNumElements(); + if (UseSplat) + return ConstantVector::getSplat(End, Val); + + SmallVector<Constant *, 4> ConstVec; + UndefValue *UndefVal = UndefValue::get(Val->getType()); + for (unsigned Idx = 0; Idx != End; ++Idx) { + if (Idx == ExtractIdx) + ConstVec.push_back(Val); + else + ConstVec.push_back(UndefVal); + } + return ConstantVector::get(ConstVec); + } + + /// \brief Check if promoting to a vector type an operand at \p OperandIdx + /// in \p Use can trigger undefined behavior. + static bool canCauseUndefinedBehavior(const Instruction *Use, + unsigned OperandIdx) { + // This is not safe to introduce undef when the operand is on + // the right hand side of a division-like instruction. + if (OperandIdx != 1) + return false; + switch (Use->getOpcode()) { + default: + return false; + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return true; + case Instruction::FDiv: + case Instruction::FRem: + return !Use->hasNoNaNs(); + } + llvm_unreachable(nullptr); + } + +public: + VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI, + Instruction *Transition, unsigned CombineCost) + : TLI(TLI), TTI(TTI), Transition(Transition), + StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { + assert(Transition && "Do not know how to promote null"); + } + + /// \brief Check if we can promote \p ToBePromoted to \p Type. + bool canPromote(const Instruction *ToBePromoted) const { + // We could support CastInst too. + return isa<BinaryOperator>(ToBePromoted); + } + + /// \brief Check if it is profitable to promote \p ToBePromoted + /// by moving downward the transition through. + bool shouldPromote(const Instruction *ToBePromoted) const { + // Promote only if all the operands can be statically expanded. + // Indeed, we do not want to introduce any new kind of transitions. + for (const Use &U : ToBePromoted->operands()) { + const Value *Val = U.get(); + if (Val == getEndOfTransition()) { + // If the use is a division and the transition is on the rhs, + // we cannot promote the operation, otherwise we may create a + // division by zero. + if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) + return false; + continue; + } + if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && + !isa<ConstantFP>(Val)) + return false; + } + // Check that the resulting operation is legal. + int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); + if (!ISDOpcode) + return false; + return StressStoreExtract || + TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(getTransitionType(), true)); + } + + /// \brief Check whether or not \p Use can be combined + /// with the transition. + /// I.e., is it possible to do Use(Transition) => AnotherUse? + bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } + + /// \brief Record \p ToBePromoted as part of the chain to be promoted. + void enqueueForPromotion(Instruction *ToBePromoted) { + InstsToBePromoted.push_back(ToBePromoted); + } + + /// \brief Set the instruction that will be combined with the transition. + void recordCombineInstruction(Instruction *ToBeCombined) { + assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); + CombineInst = ToBeCombined; + } + + /// \brief Promote all the instructions enqueued for promotion if it is + /// is profitable. + /// \return True if the promotion happened, false otherwise. + bool promote() { + // Check if there is something to promote. + // Right now, if we do not have anything to combine with, + // we assume the promotion is not profitable. + if (InstsToBePromoted.empty() || !CombineInst) + return false; + + // Check cost. + if (!StressStoreExtract && !isProfitableToPromote()) + return false; + + // Promote. + for (auto &ToBePromoted : InstsToBePromoted) + promoteImpl(ToBePromoted); + InstsToBePromoted.clear(); + return true; + } +}; +} // End of anonymous namespace. + +void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { + // At this point, we know that all the operands of ToBePromoted but Def + // can be statically promoted. + // For Def, we need to use its parameter in ToBePromoted: + // b = ToBePromoted ty1 a + // Def = Transition ty1 b to ty2 + // Move the transition down. + // 1. Replace all uses of the promoted operation by the transition. + // = ... b => = ... Def. + assert(ToBePromoted->getType() == Transition->getType() && + "The type of the result of the transition does not match " + "the final type"); + ToBePromoted->replaceAllUsesWith(Transition); + // 2. Update the type of the uses. + // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. + Type *TransitionTy = getTransitionType(); + ToBePromoted->mutateType(TransitionTy); + // 3. Update all the operands of the promoted operation with promoted + // operands. + // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. + for (Use &U : ToBePromoted->operands()) { + Value *Val = U.get(); + Value *NewVal = nullptr; + if (Val == Transition) + NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); + else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || + isa<ConstantFP>(Val)) { + // Use a splat constant if it is not safe to use undef. + NewVal = getConstantVector( + cast<Constant>(Val), + isa<UndefValue>(Val) || + canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); + } else + llvm_unreachable("Did you modified shouldPromote and forgot to update " + "this?"); + ToBePromoted->setOperand(U.getOperandNo(), NewVal); + } + Transition->removeFromParent(); + Transition->insertAfter(ToBePromoted); + Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); +} + +// See if we can speculate calls to intrinsic cttz/ctlz. +// +// Example: +// entry: +// ... +// %cmp = icmp eq i64 %val, 0 +// br i1 %cmp, label %end.bb, label %then.bb +// +// then.bb: +// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) +// br label %EndBB +// +// end.bb: +// %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ] +// +// ==> +// +// entry: +// ... +// %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false) +// +static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) { + assert(BrInst->isConditional() && "Expected a conditional branch!"); + BasicBlock *ThenBB = BrInst->getSuccessor(1); + BasicBlock *EndBB = BrInst->getSuccessor(0); + + // See if ThenBB contains only one instruction (excluding the + // terminator and DbgInfoIntrinsic calls). + IntrinsicInst *II = nullptr; + CastInst *CI = nullptr; + for (BasicBlock::iterator I = ThenBB->begin(), + E = std::prev(ThenBB->end()); I != E; ++I) { + // Skip debug info. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Check if this is a zero extension or a truncate of a previously + // matched call to intrinsic cttz/ctlz. + if (II) { + // Early exit if we already found a "free" zero extend/truncate. + if (CI) + return false; + + Type *SrcTy = II->getType(); + Type *DestTy = I->getType(); + Value *V; + + if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) { + // Speculate this zero extend only if it is "free" for the target. + if (TLI.isZExtFree(SrcTy, DestTy)) { + CI = cast<CastInst>(I); + continue; + } + } else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) { + // Speculate this truncate only if it is "free" for the target. + if (TLI.isTruncateFree(SrcTy, DestTy)) { + CI = cast<CastInst>(I); + continue; + } + } else { + // Avoid speculating more than one instruction. + return false; + } + } + + // See if this is a call to intrinsic cttz/ctlz. + if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) { + // Avoid speculating expensive intrinsic calls. + if (!TLI.isCheapToSpeculateCttz()) + return false; + } + else if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::ctlz>())) { + // Avoid speculating expensive intrinsic calls. + if (!TLI.isCheapToSpeculateCtlz()) + return false; + } else + return false; + + II = cast<IntrinsicInst>(I); + } + + // Look for PHI nodes with 'II' as the incoming value from 'ThenBB'. + BasicBlock *EntryBB = BrInst->getParent(); + for (BasicBlock::iterator I = EndBB->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + Value *ThenV = PN->getIncomingValueForBlock(ThenBB); + Value *OrigV = PN->getIncomingValueForBlock(EntryBB); + + if (!OrigV) + return false; + + if (ThenV != II && (!CI || ThenV != CI)) + return false; + + if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) { + unsigned BitWidth = II->getType()->getIntegerBitWidth(); + + // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz + // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits + // of the value in input to the cttz/ctlz. + if (CInt->getValue() != BitWidth) + return false; + + // Hoist the call to cttz/ctlz from ThenBB into EntryBB. + EntryBB->getInstList().splice(BrInst, ThenBB->getInstList(), + ThenBB->begin(), std::prev(ThenBB->end())); + + // Update PN setting ThenV as the incoming value from both 'EntryBB' + // and 'ThenBB'. Eventually, method 'OptimizeInst' will fold this + // phi node if all the incoming values are the same. + PN->setIncomingValue(PN->getBasicBlockIndex(EntryBB), ThenV); + PN->setIncomingValue(PN->getBasicBlockIndex(ThenBB), ThenV); + + // Clear the 'undef on zero' flag of the cttz/ctlz intrinsic call. + if (cast<ConstantInt>(II->getArgOperand(1))->isOne()) { + Type *Ty = II->getArgOperand(0)->getType(); + Value *Args[] = { II->getArgOperand(0), + ConstantInt::getFalse(II->getContext()) }; + Module *M = EntryBB->getParent()->getParent(); + Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty); + IRBuilder<> Builder(II); + Instruction *NewI = Builder.CreateCall(IF, Args); + + // Replace the old call to cttz/ctlz. + II->replaceAllUsesWith(NewI); + II->eraseFromParent(); + } + + // Update BrInst condition so that the branch to EndBB is always taken. + // Later on, method 'ConstantFoldTerminator' will simplify this branch + // replacing it with a direct branch to 'EndBB'. + // As a side effect, CodeGenPrepare will attempt to simplify the control + // flow graph by deleting basic block 'ThenBB' and merging 'EntryBB' into + // 'EndBB' (calling method 'EliminateFallThrough'). + BrInst->setCondition(ConstantInt::getTrue(BrInst->getContext())); + return true; + } + } + + return false; +} + +/// Some targets can do store(extractelement) with one instruction. +/// Try to push the extractelement towards the stores when the target +/// has this feature and this is profitable. +bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { + unsigned CombineCost = UINT_MAX; + if (DisableStoreExtract || !TLI || + (!StressStoreExtract && + !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), + Inst->getOperand(1), CombineCost))) + return false; + + // At this point we know that Inst is a vector to scalar transition. + // Try to move it down the def-use chain, until: + // - We can combine the transition with its single use + // => we got rid of the transition. + // - We escape the current basic block + // => we would need to check that we are moving it at a cheaper place and + // we do not do that for now. + BasicBlock *Parent = Inst->getParent(); + DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); + VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost); + // If the transition has more than one use, assume this is not going to be + // beneficial. + while (Inst->hasOneUse()) { + Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); + DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); + + if (ToBePromoted->getParent() != Parent) { + DEBUG(dbgs() << "Instruction to promote is in a different block (" + << ToBePromoted->getParent()->getName() + << ") than the transition (" << Parent->getName() << ").\n"); + return false; + } + + if (VPH.canCombine(ToBePromoted)) { + DEBUG(dbgs() << "Assume " << *Inst << '\n' + << "will be combined with: " << *ToBePromoted << '\n'); + VPH.recordCombineInstruction(ToBePromoted); + bool Changed = VPH.promote(); + NumStoreExtractExposed += Changed; + return Changed; + } + + DEBUG(dbgs() << "Try promoting.\n"); + if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) + return false; + + DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); + + VPH.enqueueForPromotion(ToBePromoted); + Inst = ToBePromoted; + } + return false; +} + +bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) { if (PHINode *P = dyn_cast<PHINode>(I)) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a @@ -3188,14 +4245,14 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { GEPI->replaceAllUsesWith(NC); GEPI->eraseFromParent(); ++NumGEPsElim; - OptimizeInst(NC); + OptimizeInst(NC, ModifiedDT); return true; } return false; } if (CallInst *CI = dyn_cast<CallInst>(I)) - return OptimizeCallInst(CI); + return OptimizeCallInst(CI, ModifiedDT); if (SelectInst *SI = dyn_cast<SelectInst>(I)) return OptimizeSelectInst(SI); @@ -3203,20 +4260,53 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) return OptimizeShuffleVectorInst(SVI); + if (isa<ExtractElementInst>(I)) + return OptimizeExtractElementInst(I); + + if (BranchInst *BI = dyn_cast<BranchInst>(I)) { + if (TLI && BI->isConditional() && BI->getCondition()->hasOneUse()) { + // Check if the branch condition compares a value agaist zero. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) { + if (ICI->getPredicate() == ICmpInst::ICMP_EQ && + match(ICI->getOperand(1), m_Zero())) { + BasicBlock *ThenBB = BI->getSuccessor(1); + BasicBlock *EndBB = BI->getSuccessor(0); + + // Check if ThenBB is only reachable from this basic block; also, + // check if EndBB has more than one predecessor. + if (ThenBB->getSinglePredecessor() && + !EndBB->getSinglePredecessor()) { + TerminatorInst *TI = ThenBB->getTerminator(); + + if (TI->getNumSuccessors() == 1 && TI->getSuccessor(0) == EndBB && + // Try to speculate calls to intrinsic cttz/ctlz from 'ThenBB'. + OptimizeBranchInst(BI, *TLI)) { + ModifiedDT = true; + return true; + } + } + } + } + } + return false; + } + return false; } // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { +bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; CurInstIterator = BB.begin(); - while (CurInstIterator != BB.end()) - MadeChange |= OptimizeInst(CurInstIterator++); - + while (CurInstIterator != BB.end()) { + MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT); + if (ModifiedDT) + return true; + } MadeChange |= DupRetToEnableTailCallOpts(&BB); return MadeChange; @@ -3227,10 +4317,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { // find a node corresponding to the value. bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + for (BasicBlock &BB : F) { Instruction *PrevNonDbgInst = nullptr; - for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { - Instruction *Insn = BI; ++BI; + for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { + Instruction *Insn = BI++; DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); // Leave dbg.values that refer to an alloca alone. These // instrinsics describe the address of a variable (= the alloca) @@ -3324,3 +4414,233 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) { } return MadeChange; } + +/// \brief Retrieve the probabilities of a conditional branch. Returns true on +/// success, or returns false if no or invalid metadata was found. +static bool extractBranchMetadata(BranchInst *BI, + uint64_t &ProbTrue, uint64_t &ProbFalse) { + assert(BI->isConditional() && + "Looking for probabilities on unconditional branch?"); + auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) + return false; + + const auto *CITrue = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); + const auto *CIFalse = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) + return false; + + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); + + return true; +} + +/// \brief Scale down both weights to fit into uint32_t. +static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + +/// \brief Some targets prefer to split a conditional branch like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp ne i32 %b, 0 +/// %or.cond = or i1 %0, %1 +/// br i1 %or.cond, label %TrueBB, label %FalseBB +/// \endcode +/// into multiple branch instructions like: +/// \code +/// bb1: +/// %0 = icmp ne i32 %a, 0 +/// br i1 %0, label %TrueBB, label %bb2 +/// bb2: +/// %1 = icmp ne i32 %b, 0 +/// br i1 %1, label %TrueBB, label %FalseBB +/// \endcode +/// This usually allows instruction selection to do even further optimizations +/// and combine the compare with the branch instruction. Currently this is +/// applied for targets which have "cheap" jump instructions. +/// +/// FIXME: Remove the (equivalent?) implementation in SelectionDAG. +/// +bool CodeGenPrepare::splitBranchCondition(Function &F) { + if (!TM || TM->Options.EnableFastISel != true || + !TLI || TLI->isJumpExpensive()) + return false; + + bool MadeChange = false; + for (auto &BB : F) { + // Does this BB end with the following? + // %cond1 = icmp|fcmp|binary instruction ... + // %cond2 = icmp|fcmp|binary instruction ... + // %cond.or = or|and i1 %cond1, cond2 + // br i1 %cond.or label %dest1, label %dest2" + BinaryOperator *LogicOp; + BasicBlock *TBB, *FBB; + if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) + continue; + + unsigned Opc; + Value *Cond1, *Cond2; + if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) + Opc = Instruction::And; + else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) + Opc = Instruction::Or; + else + continue; + + if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || + !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) + continue; + + DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); + + // Create a new BB. + auto *InsertBefore = std::next(Function::iterator(BB)) + .getNodePtrUnchecked(); + auto TmpBB = BasicBlock::Create(BB.getContext(), + BB.getName() + ".cond.split", + BB.getParent(), InsertBefore); + + // Update original basic block by using the first condition directly by the + // branch instruction and removing the no longer needed and/or instruction. + auto *Br1 = cast<BranchInst>(BB.getTerminator()); + Br1->setCondition(Cond1); + LogicOp->eraseFromParent(); + + // Depending on the conditon we have to either replace the true or the false + // successor of the original branch instruction. + if (Opc == Instruction::And) + Br1->setSuccessor(0, TmpBB); + else + Br1->setSuccessor(1, TmpBB); + + // Fill in the new basic block. + auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); + if (auto *I = dyn_cast<Instruction>(Cond2)) { + I->removeFromParent(); + I->insertBefore(Br2); + } + + // Update PHI nodes in both successors. The original BB needs to be + // replaced in one succesor's PHI nodes, because the branch comes now from + // the newly generated BB (NewBB). In the other successor we need to add one + // incoming edge to the PHI nodes, because both branch instructions target + // now the same successor. Depending on the original branch condition + // (and/or) we have to swap the successors (TrueDest, FalseDest), so that + // we perfrom the correct update for the PHI nodes. + // This doesn't change the successor order of the just created branch + // instruction (or any other instruction). + if (Opc == Instruction::Or) + std::swap(TBB, FBB); + + // Replace the old BB with the new BB. + for (auto &I : *TBB) { + PHINode *PN = dyn_cast<PHINode>(&I); + if (!PN) + break; + int i; + while ((i = PN->getBasicBlockIndex(&BB)) >= 0) + PN->setIncomingBlock(i, TmpBB); + } + + // Add another incoming edge form the new BB. + for (auto &I : *FBB) { + PHINode *PN = dyn_cast<PHINode>(&I); + if (!PN) + break; + auto *Val = PN->getIncomingValueForBlock(&BB); + PN->addIncoming(Val, TmpBB); + } + + // Update the branch weights (from SelectionDAGBuilder:: + // FindMergedConditions). + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for NewBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + uint64_t TrueWeight, FalseWeight; + if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + uint64_t NewTrueWeight = TrueWeight; + uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + + NewTrueWeight = TrueWeight; + NewFalseWeight = 2 * FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } else { + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + uint64_t TrueWeight, FalseWeight; + if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; + uint64_t NewFalseWeight = FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + + NewTrueWeight = 2 * TrueWeight; + NewFalseWeight = FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } + + // Request DOM Tree update. + // Note: No point in getting fancy here, since the DT info is never + // available to CodeGenPrepare and the existing update code is broken + // anyways. + ModifiedDT = true; + + MadeChange = true; + + DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); + TmpBB->dump()); + } + return MadeChange; +} diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index d3ffcc78471b..3d62d4887602 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -20,24 +20,20 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "post-RA-sched" -CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : - AntiDepBreaker(), MF(MFi), - MRI(MF.getRegInfo()), - TII(MF.getTarget().getInstrInfo()), - TRI(MF.getTarget().getRegisterInfo()), - RegClassInfo(RCI), - Classes(TRI->getNumRegs(), nullptr), - KillIndices(TRI->getNumRegs(), 0), - DefIndices(TRI->getNumRegs(), 0), - KeepRegs(TRI->getNumRegs(), false) {} +CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi, + const RegisterClassInfo &RCI) + : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getSubtarget().getInstrInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), + Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } @@ -94,7 +90,14 @@ void CriticalAntiDepBreaker::FinishBlock() { void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) { - if (MI->isDebugValue()) + // Kill instructions can define registers but are really nops, and there might + // be a real definition earlier that needs to be paired with uses dominated by + // this kill. + + // FIXME: It may be possible to remove the isKill() restriction once PR18663 + // has been properly fixed. There can be value in processing kills as seen in + // the AggressiveAntiDepBreaker class. + if (MI->isDebugValue() || MI->isKill()) return; assert(Count < InsertPosIndex && "Instruction index out of expected range!"); @@ -237,6 +240,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Update liveness. // Proceeding upwards, registers that are defed but not used in this // instruction are now dead. + assert(!MI->isKill() && "Attempting to scan a kill instruction"); if (!TII->isPredicated(MI)) { // Predicated defs are modeled as read + write, i.e. similar to two @@ -265,19 +269,10 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - // FIXME: we should use a SubRegIterator that includes self (as above), so - // we don't have to repeat all this code for the reg itself. - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.reset(Reg); - Classes[Reg] = nullptr; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - unsigned SubregReg = *SubRegs; + // For the reg itself and all subregs: update the def to current; + // reset the kill state, any restrictions, and references. + for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) { + unsigned SubregReg = *SRI; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; KeepRegs.reset(SubregReg); @@ -309,19 +304,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.insert(std::make_pair(Reg, &MO)); - // FIXME: we should use an MCRegAliasIterator that includes self so we don't - // have to repeat all this code for the reg itself. - // It wasn't previously live but now it is, this is a kill. - if (KillIndices[Reg] == ~0u) { - KillIndices[Reg] = Count; - DefIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - } - // Repeat, for all aliases. - for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + // Repeat for all aliases. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; if (KillIndices[AliasReg] == ~0u) { KillIndices[AliasReg] = Count; @@ -527,7 +512,14 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; - if (MI->isDebugValue()) + // Kill instructions can define registers but are really nops, and there + // might be a real definition earlier that needs to be paired with uses + // dominated by this kill. + + // FIXME: It may be possible to remove the isKill() restriction once PR18663 + // has been properly fixed. There can be value in processing kills as seen + // in the AggressiveAntiDepBreaker class. + if (MI->isDebugValue() || MI->isKill()) continue; // Check if this instruction has a dependence on the critical path that diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 45e4ff5c78b9..ceef74d1a440 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H -#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H +#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" @@ -38,32 +38,32 @@ class TargetRegisterInfo; const TargetRegisterInfo *TRI; const RegisterClassInfo &RegClassInfo; - /// AllocatableSet - The set of allocatable registers. + /// The set of allocatable registers. /// We'll be ignoring anti-dependencies on non-allocatable registers, /// because they may not be safe to break. const BitVector AllocatableSet; - /// Classes - For live regs that are only used in one register class in a + /// For live regs that are only used in one register class in a /// live range, the register class. If the register is not live, the /// corresponding value is null. If the register is live but used in /// multiple register classes, the corresponding value is -1 casted to a /// pointer. std::vector<const TargetRegisterClass*> Classes; - /// RegRefs - Map registers to all their references within a live range. + /// Map registers to all their references within a live range. std::multimap<unsigned, MachineOperand *> RegRefs; typedef std::multimap<unsigned, MachineOperand *>::const_iterator RegRefIter; - /// KillIndices - The index of the most recent kill (proceeding bottom-up), + /// The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. std::vector<unsigned> KillIndices; - /// DefIndices - The index of the most recent complete def (proceeding + /// The index of the most recent complete def (proceeding /// bottom up), or ~0u if the register is live. std::vector<unsigned> DefIndices; - /// KeepRegs - A set of registers which are live and cannot be changed to + /// A set of registers which are live and cannot be changed to /// break anti-dependencies. BitVector KeepRegs; @@ -71,26 +71,23 @@ class TargetRegisterInfo; CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); ~CriticalAntiDepBreaker(); - /// Start - Initialize anti-dep breaking for a new basic block. + /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical - /// path + /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. - /// unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) override; - /// Observe - Update liveness information to account for the current + /// Update liveness information to account for the current /// instruction, which will not be scheduled. - /// void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) override; - /// Finish - Finish anti-dep breaking for a basic block. + /// Finish anti-dep breaking for a basic block. void FinishBlock() override; private: diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index bc6e9dc43e13..0a188c0935ad 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -106,16 +106,15 @@ namespace llvm { class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - MachineDominatorTree &MDT, bool IsPostRA); + bool IsPostRA); // Schedule - Actual scheduling work. void schedule() override; }; } -DefaultVLIWScheduler::DefaultVLIWScheduler( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - bool IsPostRA) : - ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { +DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, + MachineLoopInfo &MLI, bool IsPostRA) + : ScheduleDAGInstrs(MF, &MLI, IsPostRA) { CanHandleTerminators = true; } @@ -125,12 +124,12 @@ void DefaultVLIWScheduler::schedule() { } // VLIWPacketizerList Ctor -VLIWPacketizerList::VLIWPacketizerList( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - bool IsPostRA) : TM(MF.getTarget()), MF(MF) { - TII = TM.getInstrInfo(); - ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr); - VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); +VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, bool IsPostRA) + : MF(MF) { + TII = MF.getSubtarget().getInstrInfo(); + ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA); } // VLIWPacketizerList Dtor diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 2b144d89e651..c17a35d1c734 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -19,7 +19,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "codegen-dce" @@ -58,6 +59,10 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { if (MI->isInlineAsm()) return false; + // Don't delete frame allocation labels. + if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) + return false; + // Don't delete instructions with side effects. bool SawStore = false; if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI()) @@ -90,8 +95,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { bool AnyChanges = false; MRI = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -122,19 +127,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); // It is possible that some DBG_VALUE instructions refer to this - // instruction. Examine each def operand for such references; - // if found, mark the DBG_VALUE as undef (but don't delete it). - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - MRI->markUsesInDebugValueAsUndef(Reg); - } + // instruction. They get marked as undef and will be deleted + // in the live debug variable analysis. + MI->eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; - MI->eraseFromParent(); ++NumDeletes; MIE = MBB->rend(); // MII is now pointing to the next instruction to process, diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index a195586cf624..75b74d9a6c33 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -50,6 +51,11 @@ namespace { bool runOnFunction(Function &Fn) override; + bool doFinalization(Module &M) override { + RewindFunction = nullptr; + return false; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { } const char *getPassName() const override { @@ -118,7 +124,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { return false; // Find the rewind function if we didn't already. - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index c4706328ea52..995606f65d77 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -153,8 +153,8 @@ private: public: /// runOnMachineFunction - Initialize per-function data structures. void runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); LiveRegUnits.clear(); LiveRegUnits.setUniverse(TRI->getNumRegUnits()); @@ -245,7 +245,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI || DefMI->getParent() != Head) continue; - if (InsertAfter.insert(DefMI)) + if (InsertAfter.insert(DefMI).second) DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); if (DefMI->isTerminator()) { DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); @@ -580,7 +580,7 @@ namespace { class EarlyIfConverter : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const MCSchedModel *SchedModel; + MCSchedModel SchedModel; MachineRegisterInfo *MRI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; @@ -688,7 +688,7 @@ bool EarlyIfConverter::shouldConvertIf() { FBBTrace.getCriticalPath()); // Set a somewhat arbitrary limit on the critical path extension we accept. - unsigned CritLimit = SchedModel->MispredictPenalty/2; + unsigned CritLimit = SchedModel.MispredictPenalty/2; // If-conversion only makes sense when there is unexploited ILP. Compute the // maximum-ILP resource length of the trace after if-conversion. Compare it @@ -782,8 +782,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { .enableEarlyIfConversion()) return false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); SchedModel = MF.getTarget().getSubtarget<TargetSubtargetInfo>().getSchedModel(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp index e976d7fd9d82..85b089343ca1 100644 --- a/lib/CodeGen/ErlangGC.cpp +++ b/lib/CodeGen/ErlangGC.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -53,7 +54,7 @@ ErlangGC::ErlangGC() { MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL) const { - const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); return Label; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index cf55b68b16f8..b3a22c83a808 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -29,7 +30,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "execution-fix" @@ -73,6 +75,9 @@ struct DomainValue { // Is domain available? bool hasDomain(unsigned domain) const { + assert(domain < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); return AvailableDomains & (1u << domain); } @@ -132,7 +137,7 @@ class ExeDepsFix : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - std::vector<int> AliasMap; + std::vector<SmallVector<int, 1>> AliasMap; const unsigned NumRegs; LiveReg *LiveRegs; typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; @@ -168,8 +173,8 @@ public: } private: - // Register mapping. - int regIndex(unsigned Reg); + iterator_range<SmallVectorImpl<int>::const_iterator> + regIndizes(unsigned Reg) const; // DomainValue allocation. DomainValue *alloc(int domain = -1); @@ -200,11 +205,13 @@ private: char ExeDepsFix::ID = 0; -/// Translate TRI register number to an index into our smaller tables of -/// interesting registers. Return -1 for boring registers. -int ExeDepsFix::regIndex(unsigned Reg) { +/// Translate TRI register number to a list of indizes into our stmaller tables +/// of interesting registers. +iterator_range<SmallVectorImpl<int>::const_iterator> +ExeDepsFix::regIndizes(unsigned Reg) const { assert(Reg < AliasMap.size() && "Invalid register"); - return AliasMap[Reg]; + const auto &Entry = AliasMap[Reg]; + return make_range(Entry.begin(), Entry.end()); } DomainValue *ExeDepsFix::alloc(int domain) { @@ -337,9 +344,11 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { // All uses of B are referred to A. B->Next = retain(A); - for (unsigned rx = 0; rx != NumRegs; ++rx) + for (unsigned rx = 0; rx != NumRegs; ++rx) { + assert(LiveRegs && "no space allocated for live registers"); if (LiveRegs[rx].Value == B) setLiveReg(rx, A); + } return true; } @@ -369,13 +378,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { if (MBB->pred_empty()) { for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = regIndex(*i); - if (rx < 0) - continue; - // Treat function live-ins as if they were defined just before the first - // instruction. Usually, function arguments are set up immediately - // before the call. - LiveRegs[rx].Def = -1; + for (int rx : regIndizes(*i)) { + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } } DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); return; @@ -466,26 +474,26 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { /// or undef use. bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { - int rx = regIndex(MI->getOperand(OpIdx).getReg()); - if (rx < 0) - return false; - - unsigned Clearance = CurInstr - LiveRegs[rx].Def; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + unsigned reg = MI->getOperand(OpIdx).getReg(); + for (int rx : regIndizes(reg)) { + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - return true; - } - // The current clearance seems OK, but we may be ignoring a def from a - // back-edge. - if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { - DEBUG(dbgs() << ": OK .\n"); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + continue; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); return false; } - // A def from an unprocessed back-edge may make us break this dependency. - DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); - return false; + return true; } // Update def-ages for registers defined by MI. @@ -513,26 +521,24 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { break; if (MO.isUse()) continue; - int rx = regIndex(MO.getReg()); - if (rx < 0) - continue; - - // This instruction explicitly defines rx. - DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr - << '\t' << *MI); - - // Check clearance before partial register updates. - // Call breakDependence before setting LiveRegs[rx].Def. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); - if (Pref && shouldBreakDependence(MI, i, Pref)) - TII->breakPartialRegDependency(MI, i, TRI); - - // How many instructions since rx was last written? - LiveRegs[rx].Def = CurInstr; - - // Kill off domains redefined by generic instructions. - if (Kill) - kill(rx); + for (int rx : regIndizes(MO.getReg())) { + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + + // How many instructions since rx was last written? + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + } } ++CurInstr; } @@ -581,19 +587,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - force(rx, domain); + for (int rx : regIndizes(mo.getReg())) { + force(rx, domain); + } } // Kill all defs and force them. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - kill(rx); - force(rx, domain); + for (int rx : regIndizes(mo.getReg())) { + kill(rx); + force(rx, domain); + } } } @@ -610,9 +616,10 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx].Value) { + for (int rx : regIndizes(mo.getReg())) { + DomainValue *dv = LiveRegs[rx].Value; + if (dv == nullptr) + continue; // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? @@ -644,6 +651,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { SmallVector<LiveReg, 4> Regs; for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; + assert(LiveRegs && "no space allocated for live registers"); const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. if (!LR.Value->getCommonDomains(available)) { @@ -683,9 +691,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) - if (LiveRegs[*i].Value == Latest) - kill(*i); + for (int i : used) { + assert(LiveRegs && "no space allocated for live registers"); + if (LiveRegs[i].Value == Latest) + kill(i); + } } // dv is the DomainValue we are going to use for this instruction. @@ -702,24 +712,24 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { ii != ee; ++ii) { MachineOperand &mo = *ii; if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { - kill(rx); - setLiveReg(rx, dv); + for (int rx : regIndizes(mo.getReg())) { + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { + kill(rx); + setLiveReg(rx, dv); + } } } } bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; - TII = MF->getTarget().getInstrInfo(); - TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " - << RC->getName() << " **********\n"); + << TRI->getRegClassName(RC) << " **********\n"); // If no relevant registers are used in the function, we can skip it // completely. @@ -734,13 +744,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { // Initialize the AliasMap on the first use. if (AliasMap.empty()) { - // Given a PhysReg, AliasMap[PhysReg] is either the relevant index into RC, - // or -1. - AliasMap.resize(TRI->getNumRegs(), -1); + // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and + // therefore the LiveRegs array. + AliasMap.resize(TRI->getNumRegs()); for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); AI.isValid(); ++AI) - AliasMap[*AI] = i; + AliasMap[*AI].push_back(i); } MachineBasicBlock *Entry = MF->begin(); diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 90b62b5661e1..55e809e24278 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "expand-isel-pseudos" @@ -46,7 +46,7 @@ INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - const TargetLowering *TLI = MF.getTarget().getTargetLowering(); + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Iterate through each instruction in the function, looking for pseudos. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 8969bccd4603..e7bf143b82ee 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -20,8 +20,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "postrapseudos" @@ -182,8 +183,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" << "********** Function: " << MF.getName() << '\n'); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); bool MadeChange = false; diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp new file mode 100644 index 000000000000..63c3699e13b6 --- /dev/null +++ b/lib/CodeGen/ForwardControlFlowIntegrity.cpp @@ -0,0 +1,374 @@ +//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief A pass that instruments code with fast checks for indirect calls and +/// hooks for a function to check violations. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cfi" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" +#include "llvm/CodeGen/JumpInstrTables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumCFIIndirectCalls, + "Number of indirect call sites rewritten by the CFI pass"); + +char ForwardControlFlowIntegrity::ID = 0; +INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi", + "Control-Flow Integrity", true, true) +INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); +INITIALIZE_PASS_DEPENDENCY(JumpInstrTables); +INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi", + "Control-Flow Integrity", true, true) + +ModulePass *llvm::createForwardControlFlowIntegrityPass() { + return new ForwardControlFlowIntegrity(); +} + +ModulePass *llvm::createForwardControlFlowIntegrityPass( + JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, + StringRef CFIFuncName) { + return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing, + CFIFuncName); +} + +// Checks to see if a given CallSite is making an indirect call, including +// cases where the indirect call is made through a bitcast. +static bool isIndirectCall(CallSite &CS) { + if (CS.getCalledFunction()) + return false; + + // Check the value to see if it is merely a bitcast of a function. In + // this case, it will translate to a direct function call in the resulting + // assembly, so we won't treat it as an indirect call here. + const Value *V = CS.getCalledValue(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + return !(CE->isCast() && isa<Function>(CE->getOperand(0))); + } + + // Otherwise, since we know it's a call, it must be an indirect call + return true; +} + +static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning"; + +ForwardControlFlowIntegrity::ForwardControlFlowIntegrity() + : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single), + CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") { + initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); +} + +ForwardControlFlowIntegrity::ForwardControlFlowIntegrity( + JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, + std::string CFIFuncName) + : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType), + CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) { + initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); +} + +ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {} + +void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<JumpInstrTableInfo>(); + AU.addRequired<JumpInstrTables>(); +} + +void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) { + // To get the indirect calls, we iterate over all functions and iterate over + // the list of basic blocks in each. We extract a total list of indirect calls + // before modifying any of them, since our modifications will modify the list + // of basic blocks. + for (Function &F : M) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + CallSite CS(&I); + if (!(CS && isIndirectCall(CS))) + continue; + + Value *CalledValue = CS.getCalledValue(); + + // Don't rewrite this instruction if the indirect call is actually just + // inline assembly, since our transformation will generate an invalid + // module in that case. + if (isa<InlineAsm>(CalledValue)) + continue; + + IndirectCalls.push_back(&I); + } + } + } +} + +void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M, + CFITables &CFIT) { + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + for (Instruction *I : IndirectCalls) { + CallSite CS(I); + Value *CalledValue = CS.getCalledValue(); + + // Get the function type for this call and look it up in the tables. + Type *VTy = CalledValue->getType(); + PointerType *PTy = dyn_cast<PointerType>(VTy); + Type *EltTy = PTy->getElementType(); + FunctionType *FunTy = dyn_cast<FunctionType>(EltTy); + FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy); + ++NumCFIIndirectCalls; + Constant *JumpTableStart = nullptr; + Constant *JumpTableMask = nullptr; + Constant *JumpTableSize = nullptr; + + // Some call sites have function types that don't correspond to any + // address-taken function in the module. This happens when function pointers + // are passed in from external code. + auto it = CFIT.find(TransformedTy); + if (it == CFIT.end()) { + // In this case, make sure that the function pointer will change by + // setting the mask and the start to be 0 so that the transformed + // function is 0. + JumpTableStart = ConstantInt::get(Int64Ty, 0); + JumpTableMask = ConstantInt::get(Int64Ty, 0); + JumpTableSize = ConstantInt::get(Int64Ty, 0); + } else { + JumpTableStart = it->second.StartValue; + JumpTableMask = it->second.MaskValue; + JumpTableSize = it->second.Size; + } + + rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask, + JumpTableSize); + } + + return; +} + +bool ForwardControlFlowIntegrity::runOnModule(Module &M) { + JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>(); + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + + // JumpInstrTableInfo stores information about the alignment of each entry. + // The alignment returned by JumpInstrTableInfo is alignment in bytes, not + // in the exponent. + ByteAlignment = JITI->entryByteAlignment(); + LogByteAlignment = llvm::Log2_64(ByteAlignment); + + // Set up tables for control-flow integrity based on information about the + // jump-instruction tables. + CFITables CFIT; + for (const auto &KV : JITI->getTables()) { + uint64_t Size = static_cast<uint64_t>(KV.second.size()); + uint64_t TableSize = NextPowerOf2(Size); + + int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment; + Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue); + Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size); + + // The base of the table is defined to be the first jumptable function in + // the table. + Function *First = KV.second.begin()->second; + Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy); + CFIT[KV.first].StartValue = JumpTableStartValue; + CFIT[KV.first].MaskValue = JumpTableMaskValue; + CFIT[KV.first].Size = JumpTableSize; + } + + if (CFIT.empty()) + return false; + + getIndirectCalls(M); + + if (!CFIEnforcing) { + addWarningFunction(M); + } + + // Update the instructions with the check and the indirect jump through our + // table. + updateIndirectCalls(M, CFIT); + + return true; +} + +void ForwardControlFlowIntegrity::addWarningFunction(Module &M) { + PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext()); + + // Get the type of the Warning Function: void (i8*, i8*), + // where the first argument is the name of the function in which the violation + // occurs, and the second is the function pointer that violates CFI. + SmallVector<Type *, 2> WarningFunArgs; + WarningFunArgs.push_back(CharPtrTy); + WarningFunArgs.push_back(CharPtrTy); + FunctionType *WarningFunTy = + FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false); + + if (!CFIFuncName.empty()) { + Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy); + if (!FailureFun) + report_fatal_error("Could not get or insert the function specified by" + " -cfi-func-name"); + } else { + // The default warning function swallows the warning and lets the call + // continue, since there's no generic way for it to print out this + // information. + Function *WarningFun = M.getFunction(cfi_failure_func_name); + if (!WarningFun) { + WarningFun = + Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage, + cfi_failure_func_name, &M); + } + + BasicBlock *Entry = + BasicBlock::Create(M.getContext(), "entry", WarningFun, 0); + ReturnInst::Create(M.getContext(), Entry); + } +} + +void ForwardControlFlowIntegrity::rewriteFunctionPointer( + Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart, + Constant *JumpTableMask, Constant *JumpTableSize) { + IRBuilder<> TempBuilder(I); + + Type *OrigFunType = FunPtr->getType(); + + BasicBlock *CurBB = cast<BasicBlock>(I->getParent()); + Function *CurF = cast<Function>(CurBB->getParent()); + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + + Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty); + Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty); + + Value *NewFunPtr = nullptr; + Value *Check = nullptr; + switch (CFIType) { + case CFIntegrity::Sub: { + // This is the subtract, mask, and add version. + // Subtract from the base. + Value *Sub = TempBuilder.CreateSub(TI, TStartInt); + + // Mask the difference to force this to be a table offset. + Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask); + + // Add it back to the base. + Value *Result = TempBuilder.CreateAdd(And, TStartInt); + + // Convert it back into a function pointer that we can call. + NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); + break; + } + case CFIntegrity::Ror: { + // This is the subtract and rotate version. + // Rotate right by the alignment value. The optimizer should recognize + // this sequence as a rotation. + + // This cast is safe, since unsigned is always a subset of uint64_t. + uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment); + Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64); + Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64); + + // Subtract from the base. + Value *Sub = TempBuilder.CreateSub(TI, TStartInt); + + // Create the equivalent of a rotate-right instruction. + Value *Shr = TempBuilder.CreateLShr(Sub, RightShift); + Value *Shl = TempBuilder.CreateShl(Sub, LeftShift); + Value *Or = TempBuilder.CreateOr(Shr, Shl); + + // Perform unsigned comparison to check for inclusion in the table. + Check = TempBuilder.CreateICmpULT(Or, JumpTableSize); + NewFunPtr = FunPtr; + break; + } + case CFIntegrity::Add: { + // This is the mask and add version. + // Mask the function pointer to turn it into an offset into the table. + Value *And = TempBuilder.CreateAnd(TI, JumpTableMask); + + // Then or this offset to the base and get the pointer value. + Value *Result = TempBuilder.CreateAdd(And, TStartInt); + + // Convert it back into a function pointer that we can call. + NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); + break; + } + } + + if (!CFIEnforcing) { + // If a check hasn't been added (in the rotation version), then check to see + // if it's the same as the original function. This check determines whether + // or not we call the CFI failure function. + if (!Check) + Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr); + BasicBlock *InvalidPtrBlock = + BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0); + BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I); + + // Remove the unconditional branch that connects the two blocks. + TerminatorInst *TermInst = CurBB->getTerminator(); + TermInst->eraseFromParent(); + + // Add a conditional branch that depends on the Check above. + BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB); + + // Call the warning function for this pointer, then continue. + Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock); + insertWarning(M, InvalidPtrBlock, BI, FunPtr); + } else { + // Modify the instruction to call this value. + CallSite CS(I); + CS.setCalledFunction(NewFunPtr); + } +} + +void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block, + Instruction *I, Value *FunPtr) { + Function *ParentFun = cast<Function>(Block->getParent()); + + // Get the function to call right before the instruction. + Function *WarningFun = nullptr; + if (CFIFuncName.empty()) { + WarningFun = M.getFunction(cfi_failure_func_name); + } else { + WarningFun = M.getFunction(CFIFuncName); + } + + assert(WarningFun && "Could not find the CFI failure function"); + + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + + IRBuilder<> WarningInserter(I); + // Create a mergeable GlobalVariable containing the name of the function. + Value *ParentNameGV = + WarningInserter.CreateGlobalString(ParentFun->getName()); + Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy); + Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy); + WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr); +} diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index c3e4f3ee2fb1..6101c679a5d3 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -71,9 +71,8 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, E = GCRegistry::end(); I != E; ++I) { if (Name == I->getName()) { std::unique_ptr<GCStrategy> S = I->instantiate(); - S->M = M; S->Name = Name; - StrategyMap.GetOrCreateValue(Name).setValue(S.get()); + StrategyMap[Name] = S.get(); StrategyList.push_back(std::move(S)); return StrategyList.back().get(); } @@ -92,12 +91,14 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC()); - GCFunctionInfo *GFI = S->insertFunctionInfo(F); + Functions.push_back(make_unique<GCFunctionInfo>(F, *S)); + GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; } void GCModuleInfo::clear() { + Functions.clear(); FInfoMap.clear(); StrategyMap.clear(); StrategyList.clear(); diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index f80e9ced0bc9..fdff4a78e079 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -17,11 +17,3 @@ using namespace llvm; GCMetadataPrinter::GCMetadataPrinter() { } GCMetadataPrinter::~GCMetadataPrinter() { } - -void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) { - // Default is no action. -} - -void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - // Default is no action. -} diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 1fdff6bbf106..05c36fcb00d4 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -92,6 +93,7 @@ namespace { // ----------------------------------------------------------------------------- GCStrategy::GCStrategy() : + UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false), CustomWriteBarriers(false), @@ -101,25 +103,6 @@ GCStrategy::GCStrategy() : UsesMetadata(false) {} -bool GCStrategy::initializeCustomLowering(Module &M) { return false; } - -bool GCStrategy::performCustomLowering(Function &F) { - dbgs() << "gc " << getName() << " must override performCustomLowering.\n"; - llvm_unreachable("must override performCustomLowering"); -} - - -bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { - dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; - llvm_unreachable(nullptr); -} - - -GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { - Functions.push_back(make_unique<GCFunctionInfo>(F, *this)); - return Functions.back().get(); -} - // ----------------------------------------------------------------------------- INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", @@ -377,7 +360,7 @@ void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { } void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { - const TargetFrameLowering *TFI = TM->getFrameLowering(); + const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); @@ -403,7 +386,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); MMI = &getAnalysis<MachineModuleInfo>(); - TII = TM->getInstrInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 5572a062912f..82575671e681 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -54,6 +54,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -64,10 +65,10 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "global-merge" @@ -142,7 +143,7 @@ INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); // FIXME: Infer the maximum possible offset depending on the actual users @@ -281,7 +282,7 @@ bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1502d5f23efe..e84d25d96961 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -30,7 +31,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -161,6 +161,7 @@ namespace { const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; @@ -177,6 +178,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineBranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -269,15 +271,16 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - TLI = MF.getTarget().getTargetLowering(); - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TLI = MF.getSubtarget().getTargetLowering(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); const TargetSubtargetInfo &ST = MF.getTarget().getSubtarget<TargetSubtargetInfo>(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); + SchedModel.init(ST.getSchedModel(), &ST, TII); if (!TII) return false; @@ -286,9 +289,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool BFChange = false; if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false); - BFChange = BF.OptimizeFunction(MF, TII, - MF.getTarget().getRegisterInfo(), + BranchFolder BF(true, false, *MBFI, *MBPI); + BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } @@ -420,9 +422,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false, false); - BF.OptimizeFunction(MF, TII, - MF.getTarget().getRegisterInfo(), + BranchFolder BF(false, false, *MBFI, *MBPI); + BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } @@ -940,9 +941,8 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { /// to determine if it can be if-converted. If predecessor is already enqueued, /// dequeue it! void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) { - BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + for (const auto &Predecessor : BB->predecessors()) { + BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()]; if (PBBI.IsDone || PBBI.BB == BB) continue; PBBI.IsAnalyzed = false; @@ -1184,6 +1184,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { bool HasEarlyExit = CvtBBI->FalseBB != nullptr; uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; uint32_t WeightScale = 0; + if (HasEarlyExit) { // Get weights before modifying CvtBBI->BB and BBI.BB. CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); @@ -1192,6 +1193,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); } + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index f3c8d3dc7fba..df889f76412c 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -139,21 +138,16 @@ private: ~InlineSpiller() {} public: - InlineSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm) - : MF(mf), - LIS(pass.getAnalysis<LiveIntervals>()), - LSS(pass.getAnalysis<LiveStacks>()), - AA(&pass.getAnalysis<AliasAnalysis>()), - MDT(pass.getAnalysis<MachineDominatorTree>()), - Loops(pass.getAnalysis<MachineLoopInfo>()), - VRM(vrm), - MFI(*mf.getFrameInfo()), - MRI(mf.getRegInfo()), - TII(*mf.getTarget().getInstrInfo()), - TRI(*mf.getTarget().getRegisterInfo()), - MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} + InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), + LSS(pass.getAnalysis<LiveStacks>()), + AA(&pass.getAnalysis<AliasAnalysis>()), + MDT(pass.getAnalysis<MachineDominatorTree>()), + Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), + MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), + TII(*mf.getSubtarget().getInstrInfo()), + TRI(*mf.getSubtarget().getRegisterInfo()), + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} void spill(LiveRangeEdit &) override; @@ -190,11 +184,16 @@ private: } namespace llvm { + +Spiller::~Spiller() { } +void Spiller::anchor() { } + Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { return new InlineSpiller(pass, mf, vrm); } + } //===----------------------------------------------------------------------===// @@ -509,6 +508,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; WorkList.push_back(std::make_pair(UseReg, UseVNI)); + LiveInterval &OrigLI = LIS.getInterval(Original); do { unsigned Reg; VNInfo *VNI; @@ -522,8 +522,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Trace through PHI-defs created by live range splitting. if (VNI->isPHIDef()) { - // Stop at original PHIs. We don't know the value at the predecessors. - if (VNI->def == OrigVNI->def) { + // Stop at original PHIs. We don't know the value at the + // predecessors. Look up the VNInfo for the current definition + // in OrigLI, to properly determine whether or not this phi was + // added by splitting. + if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) { DEBUG(dbgs() << "orig phi value\n"); SVI->second.DefByOrigPHI = true; SVI->second.AllDefsAreReloads = false; @@ -543,7 +546,6 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, // Separate all values dominated by OrigVNI into PHIs and non-PHIs. SmallVector<VNInfo*, 8> PHIs, NonPHIs; LiveInterval &LI = LIS.getInterval(Reg); - LiveInterval &OrigLI = LIS.getInterval(Original); for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); VI != VE; ++VI) { @@ -824,7 +826,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { WorkList.push_back(std::make_pair(LI, VNI)); do { std::tie(LI, VNI) = WorkList.pop_back_val(); - if (!UsedValues.insert(VNI)) + if (!UsedValues.insert(VNI).second) continue; if (VNI->isPHIDef()) { @@ -853,6 +855,15 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { + + // Analyze instruction + SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; + MIBundleOperands::VirtRegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + + if (!RI.Reads) + return false; + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); @@ -883,9 +894,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. - SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; - MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); @@ -939,10 +947,15 @@ void InlineSpiller::reMaterializeAll() { for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; LiveInterval &LI = LIS.getInterval(Reg); - for (MachineRegisterInfo::use_bundle_nodbg_iterator - RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end(); - RI != E; ) { - MachineInstr *MI = &*(RI++); + for (MachineRegisterInfo::reg_bundle_iterator + RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); + RegI != E; ) { + MachineInstr *MI = &*(RegI++); + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) + continue; + anyRemat |= reMaterializeFor(LI, MI); } } @@ -1078,7 +1091,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; - bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT || + MI->getOpcode() == TargetOpcode::PATCHPOINT || MI->getOpcode() == TargetOpcode::STACKMAP); // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied @@ -1218,12 +1232,16 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Modify DBG_VALUE now that the value is in a spill slot. bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(StackSlot) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); continue; } @@ -1363,7 +1381,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { StackInt = nullptr; DEBUG(dbgs() << "Inline spilling " - << MRI.getRegClass(edit.getReg())->getName() + << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) << ':' << edit.getParent() << "\nFrom original " << PrintReg(Original) << '\n'); assert(edit.getParent().isSpillable() && diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 91a1da94fbaf..1791afbe6c25 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_INTERFERENCECACHE -#define LLVM_CODEGEN_INTERFERENCECACHE +#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H +#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H #include "llvm/CodeGen/LiveIntervalUnion.h" diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index a8b860034f49..2c95e9e7d0d3 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -459,9 +459,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(CI->getOperand(0)); break; + case Intrinsic::assume: case Intrinsic::var_annotation: - break; // Strip out annotate intrinsic - + break; // Strip out these intrinsics + case Intrinsic::memcpy: { Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, @@ -527,6 +528,34 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl"); break; } + case Intrinsic::sin: { + ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl"); + break; + } + case Intrinsic::cos: { + ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl"); + break; + } + case Intrinsic::floor: { + ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl"); + break; + } + case Intrinsic::ceil: { + ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill"); + break; + } + case Intrinsic::trunc: { + ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl"); + break; + } + case Intrinsic::round: { + ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl"); + break; + } + case Intrinsic::copysign: { + ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); + break; + } case Intrinsic::flt_rounds: // Lower to "round to the nearest" if (!CI->getType()->isVoidTy()) diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp deleted file mode 100644 index 96a53892f6d3..000000000000 --- a/lib/CodeGen/JITCodeEmitter.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/JITCodeEmitter.h" - -using namespace llvm; - -void JITCodeEmitter::anchor() { } diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp index 750f71f6022e..75fa26197398 100644 --- a/lib/CodeGen/JumpInstrTables.cpp +++ b/lib/CodeGen/JumpInstrTables.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "jt" #include "llvm/CodeGen/JumpInstrTables.h" - #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/CodeGen/Passes.h" @@ -30,7 +29,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include <vector> using namespace llvm; @@ -117,8 +115,8 @@ bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { if (!isa<GlobalAlias>(C)) C->replaceUsesOfWithOnConstant(GV, V, U); } else { - assert(false && "The Use of a Function symbol is neither an instruction nor" - " a constant"); + llvm_unreachable("The Use of a Function symbol is neither an instruction " + "nor a constant"); } return true; @@ -163,7 +161,7 @@ void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { FunctionType *OrigFunTy = Target->getFunctionType(); - FunctionType *FunTy = transformType(OrigFunTy); + FunctionType *FunTy = transformType(JTType, OrigFunTy); JumpMap::iterator it = Metadata.find(FunTy); if (Metadata.end() == it) { @@ -191,11 +189,12 @@ Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { } bool JumpInstrTables::hasTable(FunctionType *FunTy) { - FunctionType *TransTy = transformType(FunTy); + FunctionType *TransTy = transformType(JTType, FunTy); return Metadata.end() != Metadata.find(TransTy); } -FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { +FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT, + FunctionType *FunTy) { // Returning nullptr forces all types into the same table, since all types map // to the same type Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); @@ -211,7 +210,7 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); FunctionType *VoidFnTy = FunctionType::get( Type::getVoidTy(FunTy->getContext()), EmptyParams, false); - switch (JTType) { + switch (JTT) { case JumpTable::Single: return FunctionType::get(RetTy, EmptyParams, false); @@ -253,10 +252,10 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { bool JumpInstrTables::runOnModule(Module &M) { JITI = &getAnalysis<JumpInstrTableInfo>(); - // Get the set of jumptable-annotated functions. + // Get the set of jumptable-annotated functions that have their address taken. DenseMap<Function *, Function *> Functions; for (Function &F : M) { - if (F.hasFnAttribute(Attribute::JumpTable)) { + if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) { assert(F.hasUnnamedAddr() && "Attribute 'jumptable' requires 'unnamed_addr'"); Functions[&F] = nullptr; diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index df96b945a8d9..9018314f2132 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,9 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" - +#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" #include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -48,8 +49,8 @@ EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); void LLVMTargetMachine::initAsmInfo() { - MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), - TargetTriple); + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( + *getSubtargetImpl()->getRegisterInfo(), getTargetTriple()); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -110,9 +111,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = - new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), - &TM->getTargetLowering()->getObjFileLowering()); + MachineModuleInfo *MMI = new MachineModuleInfo( + *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(), + &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering()); PM.add(MMI); // Set up a MachineFunction for the rest of CodeGen to work on. @@ -143,8 +144,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, AnalysisID StopAfter) { // Passes to handle jumptable function annotations. These can't be handled at // JIT time, so we don't add them directly to addPassesToGenerateCode. - PM.add(createJumpInstrTableInfoPass()); + PM.add(createJumpInstrTableInfoPass( + getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound())); PM.add(createJumpInstrTablesPass(Options.JTType)); + if (Options.FCFI) + PM.add(createForwardControlFlowIntegrityPass( + Options.JTType, Options.CFIType, Options.CFIEnforcing, + Options.getCFIFuncName())); // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, @@ -165,10 +171,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); - const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCRegisterInfo &MRI = *getRegisterInfo(); - const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); + const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo(); std::unique_ptr<MCStreamer> AsmStreamer; switch (FileType) { @@ -201,9 +207,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (!MCE || !MAB) return true; - AsmStreamer.reset(getTarget().createMCObjectStreamer( - getTargetTriple(), *Context, *MAB, Out, MCE, STI, - Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); + AsmStreamer.reset( + getTarget() + .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE, + STI, Options.MCOptions.MCRelaxAll)); break; } case CGFT_Null: @@ -226,26 +233,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return false; } -/// addPassesToEmitMachineCode - Add passes to the specified pass manager to -/// get machine code emitted. This uses a JITCodeEmitter object to handle -/// actually outputting the machine code and resolving things like the address -/// of functions. This method should return true if machine code emission is -/// not supported. -/// -bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, - JITCodeEmitter &JCE, - bool DisableVerify) { - // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, - nullptr); - if (!Context) - return true; - - addCodeEmitter(PM, JCE); - - return false; // success! -} - /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be @@ -265,19 +252,20 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, - STI, *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter( + *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; std::unique_ptr<MCStreamer> AsmStreamer; - AsmStreamer.reset(getTarget().createMCObjectStreamer( - getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, - Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); + AsmStreamer.reset(getTarget() + .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB, + Out, MCE, STI, + Options.MCOptions.MCRelaxAll)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index d12c234bf3b2..b621e3baee51 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -137,6 +137,8 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { + if (DL.isUnknown()) + return nullptr; MDNode *Scope = nullptr; MDNode *InlinedAt = nullptr; DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); @@ -172,9 +174,12 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { std::make_tuple(Parent, DIDescriptor(Scope), nullptr, false)).first; - if (!Parent && DIDescriptor(Scope).isSubprogram() && - DISubprogram(Scope).describes(MF->getFunction())) + if (!Parent) { + assert(DIDescriptor(Scope).isSubprogram()); + assert(DISubprogram(Scope).describes(MF->getFunction())); + assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; + } return &I->second; } @@ -285,7 +290,7 @@ void LexicalScopes::assignInstructionRanges( /// have machine instructions that belong to lexical scope identified by /// DebugLoc. void LexicalScopes::getMachineBasicBlocks( - DebugLoc DL, SmallPtrSet<const MachineBasicBlock *, 4> &MBBs) { + DebugLoc DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 388f58fde2ab..dc936a3f5b85 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -39,7 +39,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include "llvm/Target/TargetSubtargetInfo.h" #include <memory> using namespace llvm; @@ -109,7 +109,8 @@ public: namespace { class LDVImpl; class UserValue { - const MDNode *variable; ///< The debug info variable we are part of. + const MDNode *Variable; ///< The debug info variable we are part of. + const MDNode *Expression; ///< Any complex address expression. unsigned offset; ///< Byte offset into variable. bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is @@ -139,11 +140,10 @@ class UserValue { public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, - LocMap::Allocator &alloc) - : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), - next(nullptr), locInts(alloc) - {} + UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i, + DebugLoc L, LocMap::Allocator &alloc) + : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L), + leader(this), next(nullptr), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. UserValue *getLeader() { @@ -157,8 +157,10 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, unsigned Offset, bool indirect) const { - return Var == variable && Offset == offset && indirect == IsIndirect; + bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset, + bool indirect) const { + return Var == Variable && Expr == Expression && Offset == offset && + indirect == IsIndirect; } /// merge - Merge equivalence classes. @@ -267,7 +269,7 @@ public: LiveIntervals &LIS, const TargetInstrInfo &TRI); /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A - /// variable may have more than one corresponding DBG_VALUE instructions. + /// variable may have more than one corresponding DBG_VALUE instructions. /// Only first one needs DebugLoc to identify variable's lexical scope /// in source file. DebugLoc findDebugLoc(); @@ -306,8 +308,8 @@ class LDVImpl { UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, unsigned Offset, - bool IsIndirect, DebugLoc DL); + UserValue *getUserValue(const MDNode *Var, const MDNode *Expr, + unsigned Offset, bool IsIndirect, DebugLoc DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -329,12 +331,13 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), - ModifiedMF(false) {} + LDVImpl(LiveDebugVariables *ps) + : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); /// clear - Release all memory. void clear() { + MF = nullptr; userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); @@ -343,6 +346,7 @@ public: "Dbg values are not emitted in LDV"); EmitDone = false; ModifiedMF = false; + LS.reset(); } /// mapVirtReg - Map virtual register to an equivalence class. @@ -359,8 +363,8 @@ public: } // namespace void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { - DIVariable DV(variable); - OS << "!\""; + DIVariable DV(Variable); + OS << "!\""; DV.printExtendedName(OS); OS << "\"\t"; if (offset) @@ -420,19 +424,20 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { LDV->mapVirtReg(locations[i].getReg(), this); } -UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, - bool IsIndirect, DebugLoc DL) { +UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, + unsigned Offset, bool IsIndirect, + DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Offset, IsIndirect)) + if (UV->match(Var, Expr, Offset, IsIndirect)) return UV; } userValues.push_back( - make_unique<UserValue>(Var, Offset, IsIndirect, DL, allocator)); + make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator)); UserValue *UV = userValues.back().get(); Leader = UserValue::merge(Leader, UV); return UV; @@ -452,7 +457,7 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable - if (MI->getNumOperands() != 3 || + if (MI->getNumOperands() != 4 || !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || !MI->getOperand(2).isMetadata()) { DEBUG(dbgs() << "Can't handle " << *MI); @@ -462,9 +467,11 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // Get or create the UserValue for (variable,offset). bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *Var = MI->getOperand(2).getMetadata(); + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); //here. - UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); + UserValue *UV = + getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -693,11 +700,11 @@ void LDVImpl::computeIntervals() { } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + clear(); MF = &mf; LIS = &pass.getAnalysis<LiveIntervals>(); MDT = &pass.getAnalysis<MachineDominatorTree>(); - TRI = mf.getTarget().getRegisterInfo(); - clear(); + TRI = mf.getSubtarget().getRegisterInfo(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -709,9 +716,25 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { return Changed; } +static void removeDebugValues(MachineFunction &mf) { + for (MachineBasicBlock &MBB : mf) { + for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { + if (!MBBI->isDebugValue()) { + ++MBBI; + continue; + } + MBBI = MBB.erase(MBBI); + } + } +} + bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; + if (!FunctionDIs.count(mf.getFunction())) { + removeDebugValues(mf); + return false; + } if (!pImpl) pImpl = new LDVImpl(this); return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); @@ -933,10 +956,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, if (Loc.isReg()) BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, Loc.getReg(), offset, variable); + IsIndirect, Loc.getReg(), offset, Variable, Expression); else BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(Loc).addImm(offset).addMetadata(variable); + .addOperand(Loc) + .addImm(offset) + .addMetadata(Variable) + .addMetadata(Expression); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, @@ -974,7 +1000,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, void LDVImpl::emitDebugValues(VirtRegMap *VRM) { DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + if (!MF) + return; + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); userValues[i]->rewriteLocations(*VRM, *TRI); @@ -988,6 +1016,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); } +bool LiveDebugVariables::doInitialization(Module &M) { + FunctionDIs = makeSubprogramMap(M); + return Pass::doInitialization(M); +} #ifndef NDEBUG void LiveDebugVariables::dump() { diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index bb6743547e3d..9748329314d6 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -18,11 +18,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H -#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H +#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DebugInfo.h" namespace llvm { @@ -32,6 +33,7 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; + DenseMap<const Function*, DISubprogram> FunctionDIs; public: static char ID; // Pass identification, replacement for typeid @@ -64,9 +66,10 @@ private: bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; + bool doInitialization(Module &) override; }; } // namespace llvm -#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#endif diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index ce8ce962417b..9423edc30910 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> @@ -185,6 +186,27 @@ bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { return I != begin() && (--I)->end > Start; } +bool LiveRange::covers(const LiveRange &Other) const { + if (empty()) + return Other.empty(); + + const_iterator I = begin(); + for (const Segment &O : Other.segments) { + I = advanceTo(I, O.start); + if (I == end() || I->start > O.start) + return false; + + // Check adjacent live segments and see if we can get behind O.end. + while (I->end < O.end) { + const_iterator Last = I; + // Get next segment and abort if it was not adjacent. + ++I; + if (I == end() || Last->end != I->start) + return false; + } + } + return true; +} /// ValNo is dead, remove it. If it is the largest value number, just nuke it /// (and any other deleted values neighboring it), otherwise mark it as ~1U so @@ -204,9 +226,9 @@ void LiveRange::markValNoForDeletion(VNInfo *ValNo) { void LiveRange::RenumberValues() { SmallPtrSet<VNInfo*, 8> Seen; valnos.clear(); - for (const_iterator I = begin(), E = end(); I != E; ++I) { - VNInfo *VNI = I->valno; - if (!Seen.insert(VNI)) + for (const Segment &S : segments) { + VNInfo *VNI = S.valno; + if (!Seen.insert(VNI).second) continue; assert(!VNI->isUnused() && "Unused valno used by live segment"); VNI->id = (unsigned)valnos.size(); @@ -278,6 +300,12 @@ LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { return MergeTo; } +void LiveRange::append(const Segment S) { + // Check that the segment belongs to the back of the list. + assert(segments.empty() || segments.back().end <= S.start); + segments.push_back(S); +} + LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { SlotIndex Start = S.start, End = S.end; iterator it = std::upper_bound(From, end(), Start); @@ -461,8 +489,8 @@ void LiveRange::join(LiveRange &Other, // This can leave Other in an invalid state because we're not coalescing // touching segments that now have identical values. That's OK since Other is // not supposed to be valid after calling join(); - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; + for (Segment &S : Other.segments) + S.valno = NewVNInfo[RHSValNoAssignments[S.valno->id]]; // Update val# info. Renumber them and make sure they all belong to this // LiveRange now. Also remove dead val#'s. @@ -482,8 +510,8 @@ void LiveRange::join(LiveRange &Other, // Okay, now insert the RHS live segments into the LHS. LiveRangeUpdater Updater(this); - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - Updater.add(*I); + for (Segment &S : Other.segments) + Updater.add(S); } /// Merge all of the segments in RHS into this live range as the specified @@ -493,8 +521,8 @@ void LiveRange::join(LiveRange &Other, void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) - Updater.add(I->start, I->end, LHSValNo); + for (const Segment &S : RHS.segments) + Updater.add(S.start, S.end, LHSValNo); } /// MergeValueInAsValue - Merge all of the live segments of a specific val# @@ -506,9 +534,9 @@ void LiveRange::MergeValueInAsValue(const LiveRange &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); - for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) - if (I->valno == RHSValNo) - Updater.add(I->start, I->end, LHSValNo); + for (const Segment &S : RHS.segments) + if (S.valno == RHSValNo) + Updater.add(S.start, S.end, LHSValNo); } /// MergeValueNumberInto - This method is called when two value nubmers @@ -570,10 +598,232 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { return V2; } +void LiveInterval::removeEmptySubRanges() { + SubRange **NextPtr = &SubRanges; + SubRange *I = *NextPtr; + while (I != nullptr) { + if (!I->empty()) { + NextPtr = &I->Next; + I = *NextPtr; + continue; + } + // Skip empty subranges until we find the first nonempty one. + do { + I = I->Next; + } while (I != nullptr && I->empty()); + *NextPtr = I; + } +} + +/// Helper function for constructMainRangeFromSubranges(): Search the CFG +/// backwards until we find a place covered by a LiveRange segment that actually +/// has a valno set. +static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR, + const MachineBasicBlock *MBB, + SmallPtrSetImpl<const MachineBasicBlock*> &Visited) { + // We start the search at the end of MBB. + SlotIndex EndIdx = Indexes.getMBBEndIdx(MBB); + // In our use case we can't live the area covered by the live segments without + // finding an actual VNI def. + LiveRange::iterator I = LR.find(EndIdx.getPrevSlot()); + assert(I != LR.end()); + LiveRange::Segment &S = *I; + if (S.valno != nullptr) + return S.valno; + + VNInfo *VNI = nullptr; + // Continue at predecessors (we could even go to idom with domtree available). + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + // Avoid going in circles. + if (!Visited.insert(Pred).second) + continue; + + VNI = searchForVNI(Indexes, LR, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } + } + + return VNI; +} + +static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { + SmallPtrSet<const MachineBasicBlock*, 5> Visited; + for (LiveRange::Segment &S : LI.segments) { + if (S.valno != nullptr) + continue; + // This can only happen at the begin of a basic block. + assert(S.start.isBlock() && "valno should only be missing at block begin"); + + Visited.clear(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } + } + assert(S.valno != nullptr && "could not determine valno"); + } +} + +void LiveInterval::constructMainRangeFromSubranges( + const SlotIndexes &Indexes, VNInfo::Allocator &VNIAllocator) { + // The basic observations on which this algorithm is based: + // - Each Def/ValNo in a subrange must have a corresponding def on the main + // range, but not further defs/valnos are necessary. + // - If any of the subranges is live at a point the main liverange has to be + // live too, conversily if no subrange is live the main range mustn't be + // live either. + // We do this by scannig through all the subranges simultaneously creating new + // segments in the main range as segments start/ends come up in the subranges. + assert(hasSubRanges() && "expected subranges to be present"); + assert(segments.empty() && valnos.empty() && "expected empty main range"); + + // Collect subrange, iterator pairs for the walk and determine first and last + // SlotIndex involved. + SmallVector<std::pair<const SubRange*, const_iterator>, 4> SRs; + SlotIndex First; + SlotIndex Last; + for (const SubRange &SR : subranges()) { + if (SR.empty()) + continue; + SRs.push_back(std::make_pair(&SR, SR.begin())); + if (!First.isValid() || SR.segments.front().start < First) + First = SR.segments.front().start; + if (!Last.isValid() || SR.segments.back().end > Last) + Last = SR.segments.back().end; + } + + // Walk over all subranges simultaneously. + Segment CurrentSegment; + bool ConstructingSegment = false; + bool NeedVNIFixup = false; + unsigned ActiveMask = 0; + SlotIndex Pos = First; + while (true) { + SlotIndex NextPos = Last; + enum { + NOTHING, + BEGIN_SEGMENT, + END_SEGMENT, + } Event = NOTHING; + // Which subregister lanes are affected by the current event. + unsigned EventMask = 0; + // Whether a BEGIN_SEGMENT is also a valno definition point. + bool IsDef = false; + // Find the next begin or end of a subrange segment. Combine masks if we + // have multiple begins/ends at the same position. Ends take precedence over + // Begins. + for (auto &SRP : SRs) { + const SubRange &SR = *SRP.first; + const_iterator &I = SRP.second; + // Advance iterator of subrange to a segment involving Pos; the earlier + // segments are already merged at this point. + while (I != SR.end() && + (I->end < Pos || + (I->end == Pos && (ActiveMask & SR.LaneMask) == 0))) + ++I; + if (I == SR.end()) + continue; + if ((ActiveMask & SR.LaneMask) == 0 && + Pos <= I->start && I->start <= NextPos) { + // Merge multiple begins at the same position. + if (I->start == NextPos && Event == BEGIN_SEGMENT) { + EventMask |= SR.LaneMask; + IsDef |= I->valno->def == I->start; + } else if (I->start < NextPos || Event != END_SEGMENT) { + Event = BEGIN_SEGMENT; + NextPos = I->start; + EventMask = SR.LaneMask; + IsDef = I->valno->def == I->start; + } + } + if ((ActiveMask & SR.LaneMask) != 0 && + Pos <= I->end && I->end <= NextPos) { + // Merge multiple ends at the same position. + if (I->end == NextPos && Event == END_SEGMENT) + EventMask |= SR.LaneMask; + else { + Event = END_SEGMENT; + NextPos = I->end; + EventMask = SR.LaneMask; + } + } + } + + // Advance scan position. + Pos = NextPos; + if (Event == BEGIN_SEGMENT) { + if (ConstructingSegment && IsDef) { + // Finish previous segment because we have to start a new one. + CurrentSegment.end = Pos; + append(CurrentSegment); + ConstructingSegment = false; + } + + // Start a new segment if necessary. + if (!ConstructingSegment) { + // Determine value number for the segment. + VNInfo *VNI; + if (IsDef) { + VNI = getNextValue(Pos, VNIAllocator); + } else { + // We have to reuse an existing value number, if we are lucky + // then we already passed one of the predecessor blocks and determined + // its value number (with blocks in reverse postorder this would be + // always true but we have no such guarantee). + assert(Pos.isBlock()); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Pos); + // See if any of the predecessor blocks has a lower number and a VNI + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred); + VNI = getVNInfoBefore(PredEnd); + if (VNI != nullptr) + break; + } + // Def will come later: We have to do an extra fixup pass. + if (VNI == nullptr) + NeedVNIFixup = true; + } + + CurrentSegment.start = Pos; + CurrentSegment.valno = VNI; + ConstructingSegment = true; + } + ActiveMask |= EventMask; + } else if (Event == END_SEGMENT) { + assert(ConstructingSegment); + // Finish segment if no lane is active anymore. + ActiveMask &= ~EventMask; + if (ActiveMask == 0) { + CurrentSegment.end = Pos; + append(CurrentSegment); + ConstructingSegment = false; + } + } else { + // We reached the end of the last subranges and can stop. + assert(Event == NOTHING); + break; + } + } + + // We might not be able to assign new valnos for all segments if the basic + // block containing the definition comes after a segment using the valno. + // Do a fixup pass for this uncommon case. + if (NeedVNIFixup) + determineMissingVNIs(Indexes, *this); + + assert(ActiveMask == 0 && !ConstructingSegment && "all segments ended"); + verify(); +} + unsigned LiveInterval::getSize() const { unsigned Sum = 0; - for (const_iterator I = begin(), E = end(); I != E; ++I) - Sum += I->start.distance(I->end); + for (const Segment &S : segments) + Sum += S.start.distance(S.end); return Sum; } @@ -591,9 +841,9 @@ void LiveRange::print(raw_ostream &OS) const { if (empty()) OS << "EMPTY"; else { - for (const_iterator I = begin(), E = end(); I != E; ++I) { - OS << *I; - assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + for (const Segment &S : segments) { + OS << S; + assert(S.valno == getValNumInfo(S.valno->id) && "Bad VNInfo"); } } @@ -620,6 +870,10 @@ void LiveRange::print(raw_ostream &OS) const { void LiveInterval::print(raw_ostream &OS) const { OS << PrintReg(reg) << ' '; super::print(OS); + // Print subranges + for (const SubRange &SR : subranges()) { + OS << format(" L%04X ", SR.LaneMask) << SR; + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -648,6 +902,26 @@ void LiveRange::verify() const { } } } + +void LiveInterval::verify(const MachineRegisterInfo *MRI) const { + super::verify(); + + // Make sure SubRanges are fine and LaneMasks are disjunct. + unsigned Mask = 0; + unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; + for (const SubRange &SR : subranges()) { + // Subrange lanemask should be disjunct to any previous subrange masks. + assert((Mask & SR.LaneMask) == 0); + Mask |= SR.LaneMask; + + // subrange mask should not contained in maximum lane mask for the vreg. + assert((Mask & ~MaxMask) == 0); + + SR.verify(); + // Main liverange should cover subrange. + assert(covers(SR)); + } +} #endif @@ -692,14 +966,14 @@ void LiveRangeUpdater::print(raw_ostream &OS) const { OS << " updater with gap = " << (ReadI - WriteI) << ", last start = " << LastStart << ":\n Area 1:"; - for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I) - OS << ' ' << *I; + for (const auto &S : make_range(LR->begin(), WriteI)) + OS << ' ' << S; OS << "\n Spills:"; for (unsigned I = 0, E = Spills.size(); I != E; ++I) OS << ' ' << Spills[I]; OS << "\n Area 2:"; - for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I) - OS << ' ' << *I; + for (const auto &S : make_range(ReadI, LR->end())) + OS << ' ' << S; OS << '\n'; } @@ -860,9 +1134,7 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { const VNInfo *used = nullptr, *unused = nullptr; // Determine connections. - for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end(); - I != E; ++I) { - const VNInfo *VNI = *I; + for (const VNInfo *VNI : LI->valnos) { // Group all unused values into one class. if (VNI->isUnused()) { if (unused) @@ -938,6 +1210,8 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], } else *J++ = *I; } + // TODO: do not cheat anymore by simply cleaning all subranges + LI.clearSubRanges(); LI.segments.erase(J, E); // Transfer VNInfos to their new owners and renumber them. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 15595609e904..56f38b6407d9 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -32,10 +32,11 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> #include <limits> @@ -62,6 +63,10 @@ static cl::opt<bool> EnablePrecomputePhysRegs( static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG +static cl::opt<bool> EnableSubRegLiveness( + "enable-subreg-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable subregister liveness tracking.")); + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); @@ -110,12 +115,15 @@ void LiveIntervals::releaseMemory() { bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &MF->getRegInfo(); - TM = &fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); AA = &getAnalysis<AliasAnalysis>(); Indexes = &getAnalysis<SlotIndexes>(); DomTree = &getAnalysis<MachineDominatorTree>(); + + if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness()) + MRI->enableSubRegLiveness(true); + if (!LRCalc) LRCalc = new LiveRangeCalc(); @@ -184,9 +192,8 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->createDeadDefs(LI); - LRCalc->extendToUses(LI); - computeDeadValues(&LI, LI, nullptr, nullptr); + LRCalc->calculate(LI); + computeDeadValues(LI, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -313,6 +320,70 @@ void LiveIntervals::computeLiveInRegUnits() { } +static void createSegmentsForValues(LiveRange &LR, + iterator_range<LiveInterval::vni_iterator> VNIs) { + for (auto VNI : VNIs) { + if (VNI->isUnused()) + continue; + SlotIndex Def = VNI->def; + LR.addSegment(LiveRange::Segment(Def, Def.getDeadSlot(), VNI)); + } +} + +typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList; + +static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, + ShrinkToUsesWorkList &WorkList, + const LiveRange &OldRange) { + // Keep track of the PHIs that are in use. + SmallPtrSet<VNInfo*, 8> UsedPHIs; + // Blocks that have already been added to WorkList as live-out. + SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + + // Extend intervals to reach all uses in WorkList. + while (!WorkList.empty()) { + SlotIndex Idx = WorkList.back().first; + VNInfo *VNI = WorkList.back().second; + WorkList.pop_back(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot()); + SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB); + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) { + assert(ExtVNI == VNI && "Unexpected existing value number"); + (void)ExtVNI; + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || + !UsedPHIs.insert(VNI).second) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (auto &Pred : MBB->predecessors()) { + if (!LiveOut.insert(Pred).second) + continue; + SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + // A predecessor is not required to have a live-out value for a PHI. + if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop)) + WorkList.push_back(std::make_pair(Stop, PVNI)); + } + continue; + } + + // VNI is live-in to MBB. + DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); + LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); + + // Make sure VNI is live-out from the predecessors. + for (auto &Pred : MBB->predecessors()) { + if (!LiveOut.insert(Pred).second) + continue; + SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + assert(OldRange.getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); + WorkList.push_back(std::make_pair(Stop, VNI)); + } + } +} + /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. @@ -321,11 +392,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can only shrink virtual registers"); - // Find all the values used, including PHI kills. - SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList; - // Blocks that have already been added to WorkList as live-out. - SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + // Shrink subregister live ranges. + for (LiveInterval::SubRange &S : li->subranges()) { + shrinkToUses(S, li->reg); + } + + // Find all the values used, including PHI kills. + ShrinkToUsesWorkList WorkList; // Visit all instructions reading li->reg. for (MachineRegisterInfo::reg_instr_iterator @@ -356,102 +430,115 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Create new live ranges with only minimal live segments per def. LiveRange NewLR; - for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); - I != E; ++I) { - VNInfo *VNI = *I; - if (VNI->isUnused()) - continue; - NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); - } - - // Keep track of the PHIs that are in use. - SmallPtrSet<VNInfo*, 8> UsedPHIs; - - // Extend intervals to reach all uses in WorkList. - while (!WorkList.empty()) { - SlotIndex Idx = WorkList.back().first; - VNInfo *VNI = WorkList.back().second; - WorkList.pop_back(); - const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); - SlotIndex BlockStart = getMBBStartIdx(MBB); - - // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { - (void)ExtVNI; - assert(ExtVNI == VNI && "Unexpected existing value number"); - // Is this a PHIDef we haven't seen before? - if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) - continue; - // The PHI is live, make sure the predecessors are live-out. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - if (!LiveOut.insert(*PI)) - continue; - SlotIndex Stop = getMBBEndIdx(*PI); - // A predecessor is not required to have a live-out value for a PHI. - if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) - WorkList.push_back(std::make_pair(Stop, PVNI)); - } - continue; - } - - // VNI is live-in to MBB. - DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); - - // Make sure VNI is live-out from the predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - if (!LiveOut.insert(*PI)) - continue; - SlotIndex Stop = getMBBEndIdx(*PI); - assert(li->getVNInfoBefore(Stop) == VNI && - "Wrong value out of predecessor"); - WorkList.push_back(std::make_pair(Stop, VNI)); - } - } - - // Handle dead values. - bool CanSeparate = false; - computeDeadValues(li, NewLR, &CanSeparate, dead); + createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); + extendSegmentsToUses(NewLR, *Indexes, WorkList, *li); // Move the trimmed segments back. li->segments.swap(NewLR.segments); + + // Handle dead values. + bool CanSeparate = computeDeadValues(*li, dead); DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; } -void LiveIntervals::computeDeadValues(LiveInterval *li, - LiveRange &LR, - bool *CanSeparate, +bool LiveIntervals::computeDeadValues(LiveInterval &LI, SmallVectorImpl<MachineInstr*> *dead) { - for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); - I != E; ++I) { - VNInfo *VNI = *I; + bool PHIRemoved = false; + for (auto VNI : LI.valnos) { if (VNI->isUnused()) continue; - LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def); - assert(LRI != LR.end() && "Missing segment for PHI"); - if (LRI->end != VNI->def.getDeadSlot()) + LiveRange::iterator I = LI.FindSegmentContaining(VNI->def); + assert(I != LI.end() && "Missing segment for VNI"); + if (I->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - LR.removeSegment(LRI->start, LRI->end); + LI.removeSegment(I); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); - if (CanSeparate) - *CanSeparate = true; + PHIRemoved = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(li->reg, TRI); + MI->addRegisterDead(LI.reg, TRI); if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); dead->push_back(MI); } } } + return PHIRemoved; +} + +void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) +{ + DEBUG(dbgs() << "Shrink: " << SR << '\n'); + assert(TargetRegisterInfo::isVirtualRegister(Reg) + && "Can only shrink virtual registers"); + // Find all the values used, including PHI kills. + ShrinkToUsesWorkList WorkList; + + // Visit all instructions reading Reg. + SlotIndex LastIdx; + for (MachineOperand &MO : MRI->reg_operands(Reg)) { + MachineInstr *UseMI = MO.getParent(); + if (UseMI->isDebugValue()) + continue; + // Maybe the operand is for a subregister we don't care about. + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg); + if ((SubRegMask & SR.LaneMask) == 0) + continue; + } + // We only need to visit each instruction once. + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + if (Idx == LastIdx) + continue; + LastIdx = Idx; + + LiveQueryResult LRQ = SR.Query(Idx); + VNInfo *VNI = LRQ.valueIn(); + // For Subranges it is possible that only undef values are left in that + // part of the subregister, so there is no real liverange at the use + if (!VNI) + continue; + + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + + WorkList.push_back(std::make_pair(Idx, VNI)); + } + + // Create a new live ranges with only minimal live segments per def. + LiveRange NewLR; + createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); + extendSegmentsToUses(NewLR, *Indexes, WorkList, SR); + + // Move the trimmed ranges back. + SR.segments.swap(NewLR.segments); + + // Remove dead PHI value numbers + for (auto VNI : SR.valnos) { + if (VNI->isUnused()) + continue; + const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def); + assert(Segment != nullptr && "Missing segment for VNI"); + if (Segment->end != VNI->def.getDeadSlot()) + continue; + if (VNI->isPHIDef()) { + // This is a dead PHI. Remove it. + VNI->markUnused(); + SR.removeSegment(*Segment); + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + } + } + + DEBUG(dbgs() << "Shrunk: " << SR << '\n'); } void LiveIntervals::extendToIndices(LiveRange &LR, @@ -462,26 +549,25 @@ void LiveIntervals::extendToIndices(LiveRange &LR, LRCalc->extend(LR, Indices[i]); } -void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, +void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl<SlotIndex> *EndPoints) { - LiveQueryResult LRQ = LI->Query(Kill); - VNInfo *VNI = LRQ.valueOut(); + LiveQueryResult LRQ = LR.Query(Kill); + VNInfo *VNI = LRQ.valueOutOrDead(); if (!VNI) return; MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); - SlotIndex MBBStart, MBBEnd; - std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(KillMBB); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(KillMBB); // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { - LI->removeSegment(Kill, LRQ.endPoint()); + LR.removeSegment(Kill, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); return; } // VNI is live out of KillMBB. - LI->removeSegment(Kill, MBBEnd); + LR.removeSegment(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); // Find all blocks that are reachable from KillMBB without leaving VNI's live @@ -498,8 +584,9 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, MachineBasicBlock *MBB = *I; // Check if VNI is live in to MBB. + SlotIndex MBBStart, MBBEnd; std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveQueryResult LRQ = LI->Query(MBBStart); + LiveQueryResult LRQ = LR.Query(MBBStart); if (LRQ.valueIn() != VNI) { // This block isn't part of the VNI segment. Prune the search. I.skipChildren(); @@ -508,34 +595,46 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // Prune the search if VNI is killed in MBB. if (LRQ.endPoint() < MBBEnd) { - LI->removeSegment(MBBStart, LRQ.endPoint()); + LR.removeSegment(MBBStart, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); I.skipChildren(); continue; } // VNI is live through MBB. - LI->removeSegment(MBBStart, MBBEnd); + LR.removeSegment(MBBStart, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); ++I; } } } +void LiveIntervals::pruneValue(LiveInterval &LI, SlotIndex Kill, + SmallVectorImpl<SlotIndex> *EndPoints) { + pruneValue((LiveRange&)LI, Kill, EndPoints); + + for (LiveInterval::SubRange &SR : LI.subranges()) { + pruneValue(SR, Kill, nullptr); + } +} + //===----------------------------------------------------------------------===// // Register allocator hooks. // void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. - SmallVector<std::pair<LiveRange*, LiveRange::iterator>, 8> RU; + SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU; + // Keep track of subregister ranges. + SmallVector<std::pair<const LiveInterval::SubRange*, + LiveRange::const_iterator>, 4> SRs; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = &getInterval(Reg); - if (LI->empty()) + const LiveInterval &LI = getInterval(Reg); + if (LI.empty()) continue; // Find the regunit intervals for the assigned register. They may overlap @@ -543,15 +642,22 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { - LiveRange &RURanges = getRegUnit(*Units); - if (RURanges.empty()) + const LiveRange &RURange = getRegUnit(*Units); + if (RURange.empty()) continue; - RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); + RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); + } + + if (MRI->tracksSubRegLiveness()) { + SRs.clear(); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); + } } // Every instruction that kills Reg corresponds to a segment range end // point. - for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; + for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE; ++RI) { // A block index indicates an MBB edge. if (RI->end.isBlock()) @@ -568,23 +674,80 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // BAR %EAX<kill> // // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. - bool CancelKill = false; - for (unsigned u = 0, e = RU.size(); u != e; ++u) { - LiveRange &RRanges = *RU[u].first; - LiveRange::iterator &I = RU[u].second; - if (I == RRanges.end()) + for (auto &RUP : RU) { + const LiveRange &RURange = *RUP.first; + LiveRange::const_iterator &I = RUP.second; + if (I == RURange.end()) continue; - I = RRanges.advanceTo(I, RI->end); - if (I == RRanges.end() || I->start >= RI->end) + I = RURange.advanceTo(I, RI->end); + if (I == RURange.end() || I->start >= RI->end) continue; // I is overlapping RI. - CancelKill = true; - break; + goto CancelKill; + } + + if (MRI->tracksSubRegLiveness()) { + // When reading a partial undefined value we must not add a kill flag. + // The regalloc might have used the undef lane for something else. + // Example: + // %vreg1 = ... ; R32: %vreg1 + // %vreg2:high16 = ... ; R64: %vreg2 + // = read %vreg2<kill> ; R64: %vreg2 + // = read %vreg1 ; R32: %vreg1 + // The <kill> flag is correct for %vreg2, but the register allocator may + // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 + // are actually never written by %vreg2. After assignment the <kill> + // flag at the read instruction is invalid. + unsigned DefinedLanesMask; + if (!SRs.empty()) { + // Compute a mask of lanes that are defined. + DefinedLanesMask = 0; + for (auto &SRP : SRs) { + const LiveInterval::SubRange &SR = *SRP.first; + LiveRange::const_iterator &I = SRP.second; + if (I == SR.end()) + continue; + I = SR.advanceTo(I, RI->end); + if (I == SR.end() || I->start >= RI->end) + continue; + // I is overlapping RI + DefinedLanesMask |= SR.LaneMask; + } + } else + DefinedLanesMask = ~0u; + + bool IsFullWrite = false; + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (MO.isUse()) { + // Reading any undefined lanes? + unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + if ((UseMask & ~DefinedLanesMask) != 0) + goto CancelKill; + } else if (MO.getSubReg() == 0) { + // Writing to the full register? + assert(MO.isDef()); + IsFullWrite = true; + } + } + + // If an instruction writes to a subregister, a new segment starts in + // the LiveInterval. But as this is only overriding part of the register + // adding kill-flags is not correct here after registers have been + // assigned. + if (!IsFullWrite) { + // Next segment has to be adjacent in the subregister write case. + LiveRange::const_iterator N = std::next(RI); + if (N != LI.end() && N->start == RI->end) + goto CancelKill; + } } - if (CancelKill) - MI->clearRegisterKills(Reg, nullptr); - else - MI->addRegisterKilled(Reg, nullptr); + + MI->addRegisterKilled(Reg, nullptr); + continue; +CancelKill: + MI->clearRegisterKills(Reg, nullptr); } } } @@ -615,9 +778,7 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { bool LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I != E; ++I) { - const VNInfo *PHI = *I; + for (const VNInfo *PHI : LI.valnos) { if (PHI->isUnused() || !PHI->isPHIDef()) continue; const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); @@ -767,7 +928,16 @@ public: continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { LiveInterval &LI = LIS.getInterval(Reg); - updateRange(LI, Reg); + if (LI.hasSubRanges()) { + unsigned SubReg = MO->getSubReg(); + unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask) == 0) + continue; + updateRange(S, Reg, S.LaneMask); + } + } + updateRange(LI, Reg, 0); continue; } @@ -775,7 +945,7 @@ public: // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) if (LiveRange *LR = getRegUnitLI(*Units)) - updateRange(*LR, *Units); + updateRange(*LR, *Units, 0); } if (hasRegMask) updateRegMaskSlots(); @@ -784,21 +954,24 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveRange &LR, unsigned Reg) { - if (!Updated.insert(&LR)) + void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) { + if (!Updated.insert(&LR).second) return; DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { dbgs() << PrintReg(Reg); - else + if (LaneMask != 0) + dbgs() << format(" L%04X", LaneMask); + } else { dbgs() << PrintRegUnit(Reg, &TRI); + } dbgs() << ":\t" << LR << '\n'; }); if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) handleMoveDown(LR); else - handleMoveUp(LR, Reg); + handleMoveUp(LR, Reg, LaneMask); DEBUG(dbgs() << " -->\t" << LR << '\n'); LR.verify(); } @@ -911,7 +1084,7 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveRange &LR, unsigned Reg) { + void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) { // First look for a kill at OldIdx. LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); @@ -932,7 +1105,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - std::prev(I)->end = findLastUseBefore(Reg).getRegSlot(); + std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot(); return; } } @@ -988,15 +1161,17 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg) { + SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { SlotIndex LastUse = NewIdx; - for (MachineRegisterInfo::use_instr_nodbg_iterator - UI = MRI.use_instr_nodbg_begin(Reg), - UE = MRI.use_instr_nodbg_end(); - UI != UE; ++UI) { - const MachineInstr* MI = &*UI; + for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0 && LaneMask != 0 + && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0) + continue; + + const MachineInstr *MI = MO.getParent(); SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); if (InstSlot > LastUse && InstSlot < OldIdx) LastUse = InstSlot; @@ -1062,6 +1237,94 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, HME.updateAllRanges(MI); } +void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, + const MachineBasicBlock::iterator End, + const SlotIndex endIdx, + LiveRange &LR, const unsigned Reg, + const unsigned LaneMask) { + LiveInterval::iterator LII = LR.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LR.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + unsigned SubReg = MO.getSubReg(); + unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg); + if ((Mask & LaneMask) == 0) + continue; + + if (MO.isDef()) { + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LR.begin()) + prevStart = std::prev(LII)->start; + + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LR.removeSegment(*LII, true); + if (prevStart.isValid()) + LII = LR.find(prevStart); + else + LII = LR.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LR.addSegment(S); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LR.addSegment(S); + } + + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) + LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); + } + } + } +} + void LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, MachineBasicBlock::iterator Begin, @@ -1107,83 +1370,9 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!LI.hasAtLeastOneValue()) continue; - LiveInterval::iterator LII = LI.find(endIdx); - SlotIndex lastUseIdx; - if (LII != LI.end() && LII->start < endIdx) - lastUseIdx = LII->end; - else - --LII; - - for (MachineBasicBlock::iterator I = End; I != Begin;) { - --I; - MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - - SlotIndex instrIdx = getInstructionIndex(MI); - bool isStartValid = getInstructionFromIndex(LII->start); - bool isEndValid = getInstructionFromIndex(LII->end); - - // FIXME: This doesn't currently handle early-clobber or multiple removed - // defs inside of the region to repair. - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - const MachineOperand &MO = *OI; - if (!MO.isReg() || MO.getReg() != Reg) - continue; - - if (MO.isDef()) { - if (!isStartValid) { - if (LII->end.isDead()) { - SlotIndex prevStart; - if (LII != LI.begin()) - prevStart = std::prev(LII)->start; - - // FIXME: This could be more efficient if there was a - // removeSegment method that returned an iterator. - LI.removeSegment(*LII, true); - if (prevStart.isValid()) - LII = LI.find(prevStart); - else - LII = LI.begin(); - } else { - LII->start = instrIdx.getRegSlot(); - LII->valno->def = instrIdx.getRegSlot(); - if (MO.getSubReg() && !MO.isUndef()) - lastUseIdx = instrIdx.getRegSlot(); - else - lastUseIdx = SlotIndex(); - continue; - } - } - - if (!lastUseIdx.isValid()) { - VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), - VNInfoAllocator); - LiveRange::Segment S(instrIdx.getRegSlot(), - instrIdx.getDeadSlot(), VNI); - LII = LI.addSegment(S); - } else if (LII->start != instrIdx.getRegSlot()) { - VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), - VNInfoAllocator); - LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); - LII = LI.addSegment(S); - } - - if (MO.getSubReg() && !MO.isUndef()) - lastUseIdx = instrIdx.getRegSlot(); - else - lastUseIdx = SlotIndex(); - } else if (MO.isUse()) { - // FIXME: This should probably be handled outside of this branch, - // either as part of the def case (for defs inside of the region) or - // after the loop over the region. - if (!isEndValid && !LII->end.isBlock()) - LII->end = instrIdx.getRegSlot(); - if (!lastUseIdx.isValid()) - lastUseIdx = instrIdx.getRegSlot(); - } - } + for (LiveInterval::SubRange &S : LI.subranges()) { + repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask); } + repairOldRegInRange(Begin, End, endIdx, LI, Reg); } } diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index d81221b6a526..025d99ce7881 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -26,14 +26,14 @@ using namespace llvm; // Merge a LiveInterval's segments. Guarantee no overlaps. -void LiveIntervalUnion::unify(LiveInterval &VirtReg) { - if (VirtReg.empty()) +void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { + if (Range.empty()) return; ++Tag; // Insert each of the virtual register's live segments into the map. - LiveInterval::iterator RegPos = VirtReg.begin(); - LiveInterval::iterator RegEnd = VirtReg.end(); + LiveRange::const_iterator RegPos = Range.begin(); + LiveRange::const_iterator RegEnd = Range.end(); SegmentIter SegPos = Segments.find(RegPos->start); while (SegPos.valid()) { @@ -53,14 +53,14 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) { } // Remove a live virtual register's segments from this union. -void LiveIntervalUnion::extract(LiveInterval &VirtReg) { - if (VirtReg.empty()) +void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) { + if (Range.empty()) return; ++Tag; // Remove each of the virtual register's live segments from the map. - LiveInterval::iterator RegPos = VirtReg.begin(); - LiveInterval::iterator RegEnd = VirtReg.end(); + LiveRange::const_iterator RegPos = Range.begin(); + LiveRange::const_iterator RegEnd = Range.end(); SegmentIter SegPos = Segments.find(RegPos->start); for (;;) { @@ -70,7 +70,7 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg) { return; // Skip all segments that may have been coalesced. - RegPos = VirtReg.advanceTo(RegPos, SegPos.start()); + RegPos = Range.advanceTo(RegPos, SegPos.start()); if (RegPos == RegEnd) return; diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index a558e142e013..1d46161ad711 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -19,6 +19,13 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" +void LiveRangeCalc::resetLiveOutMap() { + unsigned NumBlocks = MF->getNumBlockIDs(); + Seen.clear(); + Seen.resize(NumBlocks); + Map.resize(NumBlocks); +} + void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, @@ -28,104 +35,188 @@ void LiveRangeCalc::reset(const MachineFunction *mf, Indexes = SI; DomTree = MDT; Alloc = VNIA; - - unsigned N = MF->getNumBlockIDs(); - Seen.clear(); - Seen.resize(N); - LiveOut.resize(N); + resetLiveOutMap(); LiveIn.clear(); } -void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { - assert(MRI && Indexes && "call reset() first"); - - // Visit all def operands. If the same instruction has multiple defs of Reg, - // LR.createDeadDef() will deduplicate. - for (MachineOperand &MO : MRI->def_operands(Reg)) { +static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, + LiveRange &LR, const MachineOperand &MO) { const MachineInstr *MI = MO.getParent(); - // Find the corresponding slot index. - SlotIndex Idx; + SlotIndex DefIdx; if (MI->isPHI()) - // PHI defs begin at the basic block start index. - Idx = Indexes->getMBBStartIdx(MI->getParent()); + DefIdx = Indexes.getMBBStartIdx(MI->getParent()); else - // Instructions are either normal 'r', or early clobber 'e'. - Idx = Indexes->getInstructionIndex(MI) - .getRegSlot(MO.isEarlyClobber()); + DefIdx = Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); // Create the def in LR. This may find an existing def. - LR.createDeadDef(Idx, *Alloc); + LR.createDeadDef(DefIdx, Alloc); +} + +void LiveRangeCalc::calculate(LiveInterval &LI) { + assert(MRI && Indexes && "call reset() first"); + + // Step 1: Create minimal live segments for every definition of Reg. + // Visit all def operands. If the same instruction has multiple defs of Reg, + // createDeadDef() will deduplicate. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + unsigned Reg = LI.reg; + for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + if (!MO.isDef() && !MO.readsReg()) + continue; + + unsigned SubReg = MO.getSubReg(); + if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) { + unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); + + // If this is the first time we see a subregister def, initialize + // subranges by creating a copy of the main range. + if (!LI.hasSubRanges() && !LI.empty()) { + unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg); + LI.createSubRangeFrom(*Alloc, ClassMask, LI); + } + + for (LiveInterval::SubRange &S : LI.subranges()) { + // A Mask for subregs common to the existing subrange and current def. + unsigned Common = S.LaneMask & Mask; + if (Common == 0) + continue; + // A Mask for subregs covered by the subrange but not the current def. + unsigned LRest = S.LaneMask & ~Mask; + LiveInterval::SubRange *CommonRange; + if (LRest != 0) { + // Split current subrange into Common and LRest ranges. + S.LaneMask = LRest; + CommonRange = LI.createSubRangeFrom(*Alloc, Common, S); + } else { + assert(Common == S.LaneMask); + CommonRange = &S; + } + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, *CommonRange, MO); + Mask &= ~Common; + } + // Create a new SubRange for subregs we did not cover yet. + if (Mask != 0) { + LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask); + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, *NewRange, MO); + } + } + + // Create the def in the main liverange. We do not have to do this if + // subranges are tracked as we recreate the main range later in this case. + if (MO.isDef() && !LI.hasSubRanges()) + createDeadDef(*Indexes, *Alloc, LI, MO); + } + + // We may have created empty live ranges for partially undefined uses, we + // can't keep them because we won't find defs in them later. + LI.removeEmptySubRanges(); + + // Step 2: Extend live segments to all uses, constructing SSA form as + // necessary. + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + resetLiveOutMap(); + extendToUses(S, Reg, S.LaneMask); + } + LI.clear(); + LI.constructMainRangeFromSubranges(*Indexes, *Alloc); + } else { + resetLiveOutMap(); + extendToUses(LI, Reg, ~0u); } } -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LR.createDeadDef() will deduplicate. + for (MachineOperand &MO : MRI->def_operands(Reg)) + createDeadDef(*Indexes, *Alloc, LR, MO); +} + + +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) { // Visit all operands that read Reg. This may include partial defs. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { // Clear all kill flags. They will be reinserted after register allocation // by LiveIntervalAnalysis::addKillFlags(). if (MO.isUse()) MO.setIsKill(false); + else { + // We only care about uses, but on the main range (mask ~0u) this includes + // the "virtual" reads happening for subregister defs. + if (Mask != ~0u) + continue; + } + if (!MO.readsReg()) continue; - // MI is reading Reg. We may have visited MI before if it happens to be - // reading Reg multiple times. That is OK, extend() is idempotent. + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + // Ignore uses not covering the current subrange. + if ((SubRegMask & Mask) == 0) + continue; + } + + // Determine the actual place of the use. const MachineInstr *MI = MO.getParent(); unsigned OpNo = (&MO - &MI->getOperand(0)); - - // Find the SlotIndex being read. - SlotIndex Idx; + SlotIndex UseIdx; if (MI->isPHI()) { assert(!MO.isDef() && "Cannot handle PHI def of partial register."); - // PHI operands are paired: (Reg, PredMBB). - // Extend the live range to be live-out from PredMBB. - Idx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); + // The actual place where a phi operand is used is the end of the pred + // MBB. PHI operands are paired: (Reg, PredMBB). + UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); } else { - // This is a normal instruction. - Idx = Indexes->getInstructionIndex(MI).getRegSlot(); // Check for early-clobber redefs. + bool isEarlyClobber = false; unsigned DefIdx; - if (MO.isDef()) { - if (MO.isEarlyClobber()) - Idx = Idx.getRegSlot(true); - } else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { + if (MO.isDef()) + isEarlyClobber = MO.isEarlyClobber(); + else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { // FIXME: This would be a lot easier if tied early-clobber uses also // had an early-clobber flag. - if (MI->getOperand(DefIdx).isEarlyClobber()) - Idx = Idx.getRegSlot(true); + isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber(); } + UseIdx = Indexes->getInstructionIndex(MI).getRegSlot(isEarlyClobber); } - extend(LR, Idx, Reg); + + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + extend(LR, UseIdx, Reg); } } -// Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns() { +void LiveRangeCalc::updateFromLiveIns() { LiveRangeUpdater Updater; - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), - E = LiveIn.end(); I != E; ++I) { - if (!I->DomNode) + for (const LiveInBlock &I : LiveIn) { + if (!I.DomNode) continue; - MachineBasicBlock *MBB = I->DomNode->getBlock(); - assert(I->Value && "No live-in value found"); + MachineBasicBlock *MBB = I.DomNode->getBlock(); + assert(I.Value && "No live-in value found"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); - if (I->Kill.isValid()) + if (I.Kill.isValid()) // Value is killed inside this block. - End = I->Kill; + End = I.Kill; else { // The value is live-through, update LiveOut as well. // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)nullptr); + Map[MBB] = LiveOutPair(I.Value, nullptr); } - Updater.setDest(&I->LR); - Updater.add(Start, End, I->Value); + Updater.setDest(&I.LR); + Updater.add(Start, End, I.Value); } LiveIn.clear(); } @@ -162,7 +253,7 @@ void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); updateSSA(); - updateLiveIns(); + updateFromLiveIns(); } @@ -202,7 +293,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, // Is this a known live-out block? if (Seen.test(Pred->getNumber())) { - if (VNInfo *VNI = LiveOut[Pred].first) { + if (VNInfo *VNI = Map[Pred].first) { if (TheVNI && TheVNI != VNI) UniqueVNI = false; TheVNI = VNI; @@ -251,8 +342,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, if (*I == KillMBBNum && Kill.isValid()) End = Kill; else - LiveOut[MF->getBlockNumbered(*I)] = - LiveOutPair(TheVNI, nullptr); + Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr); Updater.add(Start, End, TheVNI); } return true; @@ -285,9 +375,8 @@ void LiveRangeCalc::updateSSA() { Changes = 0; // Propagate live-out values down the dominator tree, inserting phi-defs // when necessary. - for (SmallVectorImpl<LiveInBlock>::iterator I = LiveIn.begin(), - E = LiveIn.end(); I != E; ++I) { - MachineDomTreeNode *Node = I->DomNode; + for (LiveInBlock &I : LiveIn) { + MachineDomTreeNode *Node = I.DomNode; // Skip block if the live-in value has already been determined. if (!Node) continue; @@ -303,16 +392,16 @@ void LiveRangeCalc::updateSSA() { // immediate dominator. Check if any of them have live-out values that are // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { - IDomValue = LiveOut[IDom->getBlock()]; + IDomValue = Map[IDom->getBlock()]; // Cache the DomTree node that defined the value. if (IDomValue.first && !IDomValue.second) - LiveOut[IDom->getBlock()].second = IDomValue.second = + Map[IDom->getBlock()].second = IDomValue.second = DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - LiveOutPair &Value = LiveOut[*PI]; + LiveOutPair &Value = Map[*PI]; if (!Value.first || Value.first == IDomValue.first) continue; @@ -334,7 +423,7 @@ void LiveRangeCalc::updateSSA() { // The value may be live-through even if Kill is set, as can happen when // we are called from extendRange. In that case LiveOutSeen is true, and // LiveOut indicates a foreign or missing value. - LiveOutPair &LOP = LiveOut[MBB]; + LiveOutPair &LOP = Map[MBB]; // Create a phi-def if required. if (needPHI) { @@ -342,25 +431,25 @@ void LiveRangeCalc::updateSSA() { assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; std::tie(Start, End) = Indexes->getMBBRange(MBB); - LiveRange &LR = I->LR; + LiveRange &LR = I.LR; VNInfo *VNI = LR.getNextValue(Start, *Alloc); - I->Value = VNI; + I.Value = VNI; // This block is done, we know the final value. - I->DomNode = nullptr; + I.DomNode = nullptr; - // Add liveness since updateLiveIns now skips this node. - if (I->Kill.isValid()) - LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI)); + // Add liveness since updateFromLiveIns now skips this node. + if (I.Kill.isValid()) + LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); else { LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { // No phi-def here. Remember incoming value. - I->Value = IDomValue.first; + I.Value = IDomValue.first; // If the IDomValue is killed in the block, don't propagate through. - if (I->Kill.isValid()) + if (I.Kill.isValid()) continue; // Propagate IDomValue if it isn't killed: diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 67ab5596d299..1b9099b5fc7e 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVERANGECALC_H -#define LLVM_CODEGEN_LIVERANGECALC_H +#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H +#define LLVM_LIB_CODEGEN_LIVERANGECALC_H #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" @@ -40,12 +40,6 @@ class LiveRangeCalc { MachineDominatorTree *DomTree; VNInfo::Allocator *Alloc; - /// Seen - Bit vector of active entries in LiveOut, also used as a visited - /// set by findReachingDefs. One entry per basic block, indexed by block - /// number. This is kept as a separate bit vector because it can be cleared - /// quickly when switching live ranges. - BitVector Seen; - /// LiveOutPair - A value and the block that defined it. The domtree node is /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; @@ -53,8 +47,14 @@ class LiveRangeCalc { /// LiveOutMap - Map basic blocks to the value leaving the block. typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; - /// LiveOut - Map each basic block where a live range is live out to the - /// live-out value and its defining block. + /// Bit vector of active entries in LiveOut, also used as a visited set by + /// findReachingDefs. One entry per basic block, indexed by block number. + /// This is kept as a separate bit vector because it can be cleared quickly + /// when switching live ranges. + BitVector Seen; + + /// Map each basic block where a live range is live out to the live-out value + /// and its defining block. /// /// For every basic block, MBB, one of these conditions shall be true: /// @@ -70,7 +70,7 @@ class LiveRangeCalc { /// /// The map can be shared by multiple live ranges as long as no two are /// live-out of the same block. - LiveOutMap LiveOut; + LiveOutMap Map; /// LiveInBlock - Information about a basic block where a live range is known /// to be live-in, but the value has not yet been determined. @@ -121,8 +121,18 @@ class LiveRangeCalc { /// blocks. No values are read from the live ranges. void updateSSA(); - /// Add liveness as specified in the LiveIn vector. - void updateLiveIns(); + /// Transfer information from the LiveIn vector to the live ranges and update + /// the given @p LiveOuts. + void updateFromLiveIns(); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask); + + /// Reset Map and Seen fields. + void resetLiveOutMap(); public: LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr), @@ -167,22 +177,19 @@ public: /// minimal live range. void createDeadDefs(LiveRange &LR, unsigned Reg); - /// createDeadDefs - Create a dead def in LI for every def of LI->reg. - void createDeadDefs(LiveInterval &LI) { - createDeadDefs(LI, LI.reg); - } - - /// extendToUses - Extend the live range of LI to reach all uses of Reg. + /// Extend the live range of @p LR to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned Reg); - - /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. - void extendToUses(LiveInterval &LI) { - extendToUses(LI, LI.reg); + void extendToUses(LiveRange &LR, unsigned PhysReg) { + extendToUses(LR, PhysReg, ~0u); } + /// Calculates liveness for the register specified in live interval @p LI. + /// Creates subregister live ranges as needed if subreg liveness tracking is + /// enabled. + void calculate(LiveInterval &LI); + //===--------------------------------------------------------------------===// // Low-level interface. //===--------------------------------------------------------------------===// @@ -204,7 +211,7 @@ public: /// addLiveInBlock(). void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { Seen.set(MBB->getNumber()); - LiveOut[MBB] = LiveOutPair(VNI, nullptr); + Map[MBB] = LiveOutPair(VNI, nullptr); } /// addLiveInBlock - Add a block with an unknown live-in value. This diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 431241fbfb63..47da205df694 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -60,9 +60,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, } void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { - for (LiveInterval::vni_iterator I = getParent().vni_begin(), - E = getParent().vni_end(); I != E; ++I) { - VNInfo *VNI = *I; + for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); @@ -135,7 +133,7 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, } // If only cheap remats were requested, bail out early. - if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove()) + if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI)) return false; // Verify that all used registers are available with the same values. @@ -286,8 +284,16 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { if (TheDelegate) TheDelegate->LRE_WillShrinkVirtReg(LI.reg); LI.removeValNo(VNI); - if (LI.empty()) + if (LI.empty()) { RegsToErase.push_back(Reg); + } else { + // Also remove the value in subranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + if (VNInfo *SVNI = S.getVNInfoAt(Idx)) + S.removeValNo(SVNI); + } + LI.removeEmptySubRanges(); + } } } } @@ -411,8 +417,11 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) - DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " - << MRI.getRegClass(LI.reg)->getName() << '\n'); + DEBUG({ + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + dbgs() << "Inflated " << PrintReg(LI.reg) << " to " + << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; + }); VRAI.calculateSpillWeightAndHint(LI); } } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index de2ce22f7fbf..154ce6fc122b 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -18,8 +18,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -48,7 +48,7 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { } bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { - TRI = MF.getTarget().getRegisterInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis<LiveIntervals>(); VRM = &getAnalysis<VirtRegMap>(); @@ -72,16 +72,44 @@ void LiveRegMatrix::releaseMemory() { } } +template<typename Callable> +bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval, + unsigned PhysReg, Callable Func) { + if (VRegInterval.hasSubRanges()) { + for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = (*Units).first; + unsigned Mask = (*Units).second; + for (LiveInterval::SubRange &S : VRegInterval.subranges()) { + if (S.LaneMask & Mask) { + if (Func(Unit, S)) + return true; + break; + } + } + } + } else { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (Func(*Units, VRegInterval)) + return true; + } + } + return false; +} + void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) << " to " << PrintReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); MRI->setPhysRegUsed(PhysReg); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); - Matrix[*Units].unify(VirtReg); - } + + foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range); + Matrix[Unit].unify(VirtReg, Range); + return false; + }); + ++NumAssigned; DEBUG(dbgs() << '\n'); } @@ -91,10 +119,14 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) { DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) << " from " << PrintReg(PhysReg, TRI) << ':'); VRM->clearVirt(VirtReg.reg); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - DEBUG(dbgs() << ' ' << PrintRegUnit(*Units, TRI)); - Matrix[*Units].extract(VirtReg); - } + + foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI)); + Matrix[Unit].extract(VirtReg, Range); + return false; + }); + ++NumUnassigned; DEBUG(dbgs() << '\n'); } @@ -122,12 +154,13 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - const LiveRange &UnitRange = LIS->getRegUnit(*Units); - if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes())) - return true; - } - return false; + + bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + const LiveRange &UnitRange = LIS->getRegUnit(Unit); + return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes()); + }); + return Result; } LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg, diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index b3161a459707..8a6ac251ab2d 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <limits> using namespace llvm; @@ -49,7 +50,7 @@ void LiveStacks::releaseMemory() { } bool LiveStacks::runOnMachineFunction(MachineFunction &MF) { - TRI = MF.getTarget().getRegisterInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); // FIXME: No analysis is being done right now. We are relying on the // register allocators to provide the information. return false; @@ -80,7 +81,7 @@ void LiveStacks::print(raw_ostream &OS, const Module*) const { int Slot = I->first; const TargetRegisterClass *RC = getIntervalRegClass(Slot); if (RC) - OS << " [" << RC->getName() << "]\n"; + OS << " [" << TRI->getRegClassName(RC) << "]\n"; else OS << " [Unknown]\n"; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 758b2163ff4f..c4bca5f3a11e 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include <algorithm> using namespace llvm; @@ -497,17 +496,135 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, } } +void LiveVariables::runOnInstr(MachineInstr *MI, + SmallVectorImpl<unsigned> &Defs) { + assert(!MI->isDebugValue()); + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->isPHI()) + NumOperandsToProcess = 1; + + // Clear kill and dead markers. LV will recompute them. + SmallVector<unsigned, 4> UseRegs; + SmallVector<unsigned, 4> DefRegs; + SmallVector<unsigned, 1> RegMasks; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + RegMasks.push_back(i); + continue; + } + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + MO.setIsKill(false); + if (MO.readsReg()) + UseRegs.push_back(MOReg); + } else /*MO.isDef()*/ { + MO.setIsDead(false); + DefRegs.push_back(MOReg); + } + } + + MachineBasicBlock *MBB = MI->getParent(); + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegUse(MOReg, MI); + } + + // Process all masked registers. (Call clobbers). + for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) + HandleRegMask(MI->getOperand(RegMasks[i])); + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegDef(MOReg, MI, Defs); + } + UpdatePhysRegDefs(MI, Defs); +} + +void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { + // Mark live-in registers as live-in. + SmallVector<unsigned, 4> Defs; + for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(TargetRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, nullptr, Defs); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + runOnInstr(MI, Defs); + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // MachineCSE may CSE instructions which write to non-allocatable physical + // registers across MBBs. Remember if any reserved register is liveout. + SmallSet<unsigned, 4> LiveOuts; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->isLandingPad()) + continue; + for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), + LE = SuccMBB->livein_end(); LI != LE; ++LI) { + unsigned LReg = *LI; + if (!TRI->isInAllocatableClass(LReg)) + // Ignore other live-ins, e.g. those that are live into landing pads. + LiveOuts.insert(LReg); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) + HandlePhysRegDef(i, nullptr, Defs); +} + bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); - TRI = MF->getTarget().getRegisterInfo(); - - unsigned NumRegs = TRI->getNumRegs(); - PhysRegDef = new MachineInstr*[NumRegs]; - PhysRegUse = new MachineInstr*[NumRegs]; - PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()]; - std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); - std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); + TRI = MF->getSubtarget().getRegisterInfo(); + + const unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef.assign(NumRegs, nullptr); + PhysRegUse.assign(NumRegs, nullptr); + PHIVarInfo.resize(MF->getNumBlockIDs()); PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on @@ -525,124 +642,11 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *Entry = MF->begin(); SmallPtrSet<MachineBasicBlock*,16> Visited; - for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MachineBasicBlock *MBB = *DFI; - - // Mark live-in registers as live-in. - SmallVector<unsigned, 4> Defs; - for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), - EE = MBB->livein_end(); II != EE; ++II) { - assert(TargetRegisterInfo::isPhysicalRegister(*II) && - "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, nullptr, Defs); - } - - // Loop over all of the instructions, processing them. - DistanceMap.clear(); - unsigned Dist = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - DistanceMap.insert(std::make_pair(MI, Dist++)); - - // Process all of the operands of the instruction... - unsigned NumOperandsToProcess = MI->getNumOperands(); - - // Unless it is a PHI node. In this case, ONLY process the DEF, not any - // of the uses. They will be handled in other basic blocks. - if (MI->isPHI()) - NumOperandsToProcess = 1; - - // Clear kill and dead markers. LV will recompute them. - SmallVector<unsigned, 4> UseRegs; - SmallVector<unsigned, 4> DefRegs; - SmallVector<unsigned, 1> RegMasks; - for (unsigned i = 0; i != NumOperandsToProcess; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) { - RegMasks.push_back(i); - continue; - } - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned MOReg = MO.getReg(); - if (MO.isUse()) { - MO.setIsKill(false); - if (MO.readsReg()) - UseRegs.push_back(MOReg); - } else /*MO.isDef()*/ { - MO.setIsDead(false); - DefRegs.push_back(MOReg); - } - } - - // Process all uses. - for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { - unsigned MOReg = UseRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) - HandleVirtRegUse(MOReg, MBB, MI); - else if (!MRI->isReserved(MOReg)) - HandlePhysRegUse(MOReg, MI); - } - - // Process all masked registers. (Call clobbers). - for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) - HandleRegMask(MI->getOperand(RegMasks[i])); - - // Process all defs. - for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { - unsigned MOReg = DefRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) - HandleVirtRegDef(MOReg, MI); - else if (!MRI->isReserved(MOReg)) - HandlePhysRegDef(MOReg, MI, Defs); - } - UpdatePhysRegDefs(MI, Defs); - } - - // Handle any virtual assignments from PHI nodes which might be at the - // bottom of this basic block. We check all of our successor blocks to see - // if they have PHI nodes, and if so, we simulate an assignment at the end - // of the current block. - if (!PHIVarInfo[MBB->getNumber()].empty()) { - SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - - for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), - E = VarInfoVec.end(); I != E; ++I) - // Mark it alive only in the block we are representing. - MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), - MBB); - } - - // MachineCSE may CSE instructions which write to non-allocatable physical - // registers across MBBs. Remember if any reserved register is liveout. - SmallSet<unsigned, 4> LiveOuts; - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->isLandingPad()) - continue; - for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), - LE = SuccMBB->livein_end(); LI != LE; ++LI) { - unsigned LReg = *LI; - if (!TRI->isInAllocatableClass(LReg)) - // Ignore other live-ins, e.g. those that are live into landing pads. - LiveOuts.insert(LReg); - } - } - - // Loop over PhysRegDef / PhysRegUse, killing any registers that are - // available at the end of the basic block. - for (unsigned i = 0; i != NumRegs; ++i) - if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) - HandlePhysRegDef(i, nullptr, Defs); + for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) { + runOnBlock(MBB, NumRegs); - std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); - std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); + PhysRegDef.assign(NumRegs, nullptr); + PhysRegUse.assign(NumRegs, nullptr); } // Convert and transfer the dead / killed information we have gathered into @@ -664,9 +668,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { assert(Visited.count(&*i) != 0 && "unreachable basic block found"); #endif - delete[] PhysRegDef; - delete[] PhysRegUse; - delete[] PHIVarInfo; + PhysRegDef.clear(); + PhysRegUse.clear(); + PHIVarInfo.clear(); return false; } diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 36885e8890d5..e8bf687a626e 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -102,7 +103,7 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned LocalObjectCount = MFI->getObjectIndexEnd(); // If the target doesn't want/need this pass, or if there are no locals @@ -183,7 +184,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; @@ -272,8 +273,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { bool UsedBaseReg = false; MachineFrameInfo *MFI = Fn.getFrameInfo(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; @@ -290,6 +291,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Debug value, stackmap and patchpoint instructions can't be out of // range, so they don't need any updates. if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STATEPOINT || MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT) continue; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 08fef5ffaf44..3c7390527e5f 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/LeakDetector.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" @@ -32,6 +31,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -44,7 +44,6 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) } MachineBasicBlock::~MachineBasicBlock() { - LeakDetector::removeGarbageObject(this); } /// getSymbol - Return the MCSymbol for this basic block. @@ -53,8 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const TargetMachine &TM = MF->getTarget(); - const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -85,14 +83,11 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { for (MachineBasicBlock::instr_iterator I = N->instr_begin(), E = N->instr_end(); I != E; ++I) I->AddRegOperandsToUseLists(RegInfo); - - LeakDetector::removeGarbageObject(N); } void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { N->getParent()->removeFromMBBNumbering(N->Number); N->Number = -1; - LeakDetector::addGarbageObject(N); } @@ -107,8 +102,6 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { // use/def lists. MachineFunction *MF = Parent->getParent(); N->AddRegOperandsToUseLists(MF->getRegInfo()); - - LeakDetector::removeGarbageObject(N); } /// removeNodeFromList (MI) - When we remove an instruction from a basic block @@ -122,8 +115,6 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(nullptr); - - LeakDetector::addGarbageObject(N); } /// transferNodesFromList (MI) - When moving a range of instructions from one @@ -290,7 +281,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\n'; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (!livein_empty()) { if (Indexes) OS << '\t'; OS << " Live Ins:"; @@ -359,7 +350,7 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { bool LiveIn = isLiveIn(PhysReg); iterator I = SkipPHIsAndLabels(begin()), E = end(); MachineRegisterInfo &MRI = getParent()->getRegInfo(); - const TargetInstrInfo &TII = *getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo(); // Look for an existing copy. if (LiveIn) @@ -390,7 +381,7 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { } void MachineBasicBlock::updateTerminator() { - const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); // A block with no successors has no concerns with fall-through edges. if (this->succ_empty()) return; @@ -645,7 +636,7 @@ bool MachineBasicBlock::canFallThrough() { // Analyze the branches, if any, at the end of the block. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { // If we couldn't analyze the branch, examine the last instruction. // If the block doesn't end in a known control barrier, assume fallthrough @@ -690,7 +681,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) @@ -795,7 +786,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl); + MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, + dl); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); @@ -823,7 +815,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addLiveIn(*I); // Update LiveVariables. - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { @@ -903,31 +895,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } if (MachineDominatorTree *MDT = - P->getAnalysisIfAvailable<MachineDominatorTree>()) { - // Update dominator information. - MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); - - bool IsNewIDom = true; - for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); - PI != E; ++PI) { - MachineBasicBlock *PredBB = *PI; - if (PredBB == NMBB) - continue; - if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { - IsNewIDom = false; - break; - } - } - - // We know "this" dominates the newly created basic block. - MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); - - // If all the other predecessors of "Succ" are dominated by "Succ" itself - // then the new block is the new immediate dominator of "Succ". Otherwise, - // the new block doesn't dominate anything. - if (IsNewIDom) - MDT->changeImmediateDominator(SucccDTNode, NewDTNode); - } + P->getAnalysisIfAvailable<MachineDominatorTree>()) + MDT->recordSplitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { @@ -1086,7 +1055,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock::succ_iterator SI = succ_begin(); while (SI != succ_end()) { const MachineBasicBlock *MBB = *SI; - if (!SeenMBBs.insert(MBB) || + if (!SeenMBBs.insert(MBB).second || (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { // This is a superfluous edge, remove it. SI = removeSuccessor(SI); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 74af1e2d64b0..aaa7d9156976 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -812,7 +813,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BE = L.block_end(); BI != BE; ++BI) { BlockChain &Chain = *BlockToChain[*BI]; - if (!UpdatedPreds.insert(&Chain)) + if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); @@ -913,7 +914,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; BlockChain &Chain = *BlockToChain[BB]; - if (!UpdatedPreds.insert(&Chain)) + if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); @@ -1045,9 +1046,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (F.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) return; - unsigned Align = TLI->getPrefLoopAlignment(); - if (!Align) - return; // Don't care about loop alignment. if (FunctionChain.begin() == FunctionChain.end()) return; // Empty chain. @@ -1065,6 +1063,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!L) continue; + unsigned Align = TLI->getPrefLoopAlignment(L); + if (!Align) + continue; // Don't care about loop alignment. + // If the block is cold relative to the function entry don't waste space // aligning it. BlockFrequency Freq = MBFI->getBlockFreq(*BI); @@ -1111,8 +1113,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); MLI = &getAnalysis<MachineLoopInfo>(); - TII = F.getTarget().getInstrInfo(); - TLI = F.getTarget().getTargetLowering(); + TII = F.getSubtarget().getInstrInfo(); + TLI = F.getSubtarget().getTargetLowering(); assert(BlockToChain.empty()); buildCFGChains(F); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 7da439caded4..29604089ad80 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-cse" @@ -78,7 +79,8 @@ namespace { SmallVector<MachineInstr*, 64> Exps; unsigned CurrVN; - bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) const; @@ -112,8 +114,12 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, - MachineBasicBlock *MBB) { +/// The source register of a COPY machine instruction can be propagated to all +/// its users, and this propagation could increase the probability of finding +/// common subexpressions. If the COPY has only one user, the COPY itself can +/// be removed. +bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB) { bool Changed = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -122,10 +128,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (!MRI->hasOneNonDBGUse(Reg)) - // Only coalesce single use copies. This ensure the copy will be - // deleted. - continue; + bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI->isCopy()) continue; @@ -153,10 +156,14 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); + // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); - DefMI->eraseFromParent(); - ++NumCoalesces; + // Coalesce single use copies. + if (OnlyOneUse) { + DefMI->eraseFromParent(); + ++NumCoalesces; + } Changed = true; } @@ -380,7 +387,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (MI->isAsCheapAsAMove()) { + if (TII->isAsCheapAsAMove(MI)) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -444,6 +451,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; SmallVector<unsigned, 2> ImplicitDefsToUpdate; + SmallVector<unsigned, 2> ImplicitDefs; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -453,13 +461,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool FoundCSE = VNT.count(MI); if (!FoundCSE) { - // Look for trivial copy coalescing opportunities. - if (PerformTrivialCoalescing(MI, MBB)) { + // Using trivial copy propagation to find more CSE opportunities. + if (PerformTrivialCopyPropagation(MI, MBB)) { Changed = true; // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; + + // Try again to see if CSE is possible. FoundCSE = VNT.count(MI); } } @@ -533,6 +543,12 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // we should make sure it is not dead at CSMI. if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) ImplicitDefsToUpdate.push_back(i); + + // Keep track of implicit defs of CSMI and MI, to clear possibly + // made-redundant kill flags. + if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) + ImplicitDefs.push_back(OldReg); + if (OldReg == NewReg) { --NumDefs; continue; @@ -573,6 +589,29 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + // Go through implicit defs of CSMI and MI, and clear the kill flags on + // their uses in all the instructions between CSMI and MI. + // We might have made some of the kill flags redundant, consider: + // subs ... %NZCV<imp-def> <- CSMI + // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore + // subs ... %NZCV<imp-def> <- MI, to be eliminated + // csinc ... %NZCV<imp-use,kill> + // Since we eliminated MI, and reused a register imp-def'd by CSMI + // (here %NZCV), that register, if it was killed before MI, should have + // that kill flag removed, because it's lifetime was extended. + if (CSMI->getParent() == MI->getParent()) { + for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) + for (auto ImplicitDef : ImplicitDefs) + if (MachineOperand *MO = II->findRegisterUseOperand( + ImplicitDef, /*isKill=*/true, TRI)) + MO->setIsKill(false); + } else { + // If the instructions aren't in the same BB, bail out and clear the + // kill flag on all uses of the imp-def'd register. + for (auto ImplicitDef : ImplicitDefs) + MRI->clearKillFlags(ImplicitDef); + } + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -597,6 +636,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { } CSEPairs.clear(); ImplicitDefsToUpdate.clear(); + ImplicitDefs.clear(); } return Changed; @@ -663,8 +703,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<MachineDominatorTree>(); diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp deleted file mode 100644 index 81b49784c052..000000000000 --- a/lib/CodeGen/MachineCodeEmitter.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineCodeEmitter.h" - -using namespace llvm; - -void MachineCodeEmitter::anchor() { } diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp new file mode 100644 index 000000000000..2931258172bf --- /dev/null +++ b/lib/CodeGen/MachineCombiner.cpp @@ -0,0 +1,435 @@ +//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The machine combiner pass uses machine trace metrics to ensure the combined +// instructions does not lengthen the critical path or the resource depth. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "machine-combiner" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +STATISTIC(NumInstCombined, "Number of machineinst combined"); + +namespace { +class MachineCombiner : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MCSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + + TargetSchedModel TSchedModel; + + /// OptSize - True if optimizing for code size. + bool OptSize; + +public: + static char ID; + MachineCombiner() : MachineFunctionPass(ID) { + initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Machine InstCombiner"; } + +private: + bool doSubstitute(unsigned NewSize, unsigned OldSize); + bool combineInstructions(MachineBasicBlock *); + MachineInstr *getOperandDef(const MachineOperand &MO); + unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace); + unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace); + bool + preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg); + bool preservesResourceLen(MachineBasicBlock *MBB, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs); + void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, + SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); +}; +} + +char MachineCombiner::ID = 0; +char &llvm::MachineCombinerID = MachineCombiner::ID; + +INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", + "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", + false, false) + +void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { + MachineInstr *DefInstr = nullptr; + // We need a virtual register definition. + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + DefInstr = MRI->getUniqueVRegDef(MO.getReg()); + // PHI's have no depth etc. + if (DefInstr && DefInstr->isPHI()) + DefInstr = nullptr; + return DefInstr; +} + +/// getDepth - Computes depth of instructions in vector \InsInstr. +/// +/// \param InsInstrs is a vector of machine instructions +/// \param InstrIdxForVirtReg is a dense map of virtual register to index +/// of defining machine instruction in \p InsInstrs +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Depth of last instruction in \InsInstrs ("NewRoot") +unsigned +MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace) { + + SmallVector<unsigned, 16> InstrDepth; + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + + // Foreach instruction in in the new sequence compute the depth based on the + // operands. Use the trace information when possible. For new operands which + // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth + for (auto *InstrPtr : InsInstrs) { // for each Use + unsigned IDepth = 0; + DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); + for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = InstrPtr->getOperand(i); + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isUse()) + continue; + unsigned DepthOp = 0; + unsigned LatencyOp = 0; + DenseMap<unsigned, unsigned>::iterator II = + InstrIdxForVirtReg.find(MO.getReg()); + if (II != InstrIdxForVirtReg.end()) { + // Operand is new virtual register not in trace + assert(II->second < InstrDepth.size() && "Bad Index"); + MachineInstr *DefInstr = InsInstrs[II->second]; + assert(DefInstr && + "There must be a definition for a new virtual register"); + DepthOp = InstrDepth[II->second]; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } else { + MachineInstr *DefInstr = getOperandDef(MO); + if (DefInstr) { + DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } + } + IDepth = std::max(IDepth, DepthOp + LatencyOp); + } + InstrDepth.push_back(IDepth); + } + unsigned NewRootIdx = InsInstrs.size() - 1; + return InstrDepth[NewRootIdx]; +} + +/// getLatency - Computes instruction latency as max of latency of defined +/// operands +/// +/// \param Root is a machine instruction that could be replaced by NewRoot. +/// It is used to compute a more accurate latency information for NewRoot in +/// case there is a dependent instruction in the same trace (\p BlockTrace) +/// \param NewRoot is the instruction for which the latency is computed +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Latency of \p NewRoot +unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace) { + + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + + // Check each definition in NewRoot and compute the latency + unsigned NewRootLatency = 0; + + for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = NewRoot->getOperand(i); + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isDef()) + continue; + // Get the first instruction that uses MO + MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); + RI++; + MachineInstr *UseMO = RI->getParent(); + unsigned LatencyOp = 0; + if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { + LatencyOp = TSchedModel.computeOperandLatency( + NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, + UseMO->findRegisterUseOperandIdx(MO.getReg())); + } else { + LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); + } + NewRootLatency = std::max(NewRootLatency, LatencyOp); + } + return NewRootLatency; +} + +/// preservesCriticalPathlen - True when the new instruction sequence does not +/// lengthen the critical path. The DAGCombine code sequence ends in MI +/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A +/// necessary condition for the new sequence to replace the old sequence is that +/// is cannot lengthen the critical path. This is decided by the formula +/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). +/// The slack is the number of cycles Root can be delayed before the critical +/// patch becomes longer. +bool MachineCombiner::preservesCriticalPathLen( + MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) { + + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + // NewRoot is the last instruction in the \p InsInstrs vector + // Get depth and latency of NewRoot + unsigned NewRootIdx = InsInstrs.size() - 1; + MachineInstr *NewRoot = InsInstrs[NewRootIdx]; + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); + + // Get depth, latency and slack of Root + unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + unsigned RootLatency = TSchedModel.computeInstrLatency(Root); + unsigned RootSlack = BlockTrace.getInstrSlack(Root); + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth + << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency + << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency " + << NewRootDepth + NewRootLatency << "\n"; + dbgs() << " RootDepth + RootLatency + RootSlack " + << RootDepth + RootLatency + RootSlack << "\n";); + + /// True when the new sequence does not lenghten the critical path. + return ((NewRootDepth + NewRootLatency) <= + (RootDepth + RootLatency + RootSlack)); +} + +/// helper routine to convert instructions into SC +void MachineCombiner::instr2instrSC( + SmallVectorImpl<MachineInstr *> &Instrs, + SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) { + for (auto *InstrPtr : Instrs) { + unsigned Opc = InstrPtr->getOpcode(); + unsigned Idx = TII->get(Opc).getSchedClass(); + const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); + InstrsSC.push_back(SC); + } +} +/// preservesResourceLen - True when the new instructions do not increase +/// resource length +bool MachineCombiner::preservesResourceLen( + MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs) { + + // Compute current resource length + + //ArrayRef<const MachineBasicBlock *> MBBarr(MBB); + SmallVector <const MachineBasicBlock *, 1> MBBarr; + MBBarr.push_back(MBB); + unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); + + // Deal with SC rather than Instructions. + SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC; + SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC; + + instr2instrSC(InsInstrs, InsInstrsSC); + instr2instrSC(DelInstrs, DelInstrsSC); + + ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC); + ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC); + + // Compute new resource length + unsigned ResLenAfterCombine = + BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); + + DEBUG(dbgs() << "RESOURCE DATA: \n"; + dbgs() << " resource len before: " << ResLenBeforeCombine + << " after: " << ResLenAfterCombine << "\n";); + + return ResLenAfterCombine <= ResLenBeforeCombine; +} + +/// \returns true when new instruction sequence should be generated +/// independent if it lenghtens critical path or not +bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { + if (OptSize && (NewSize < OldSize)) + return true; + if (!TSchedModel.hasInstrSchedModel()) + return true; + return false; +} + +/// combineInstructions - substitute a slow code sequence with a faster one by +/// evaluating instruction combining pattern. +/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction +/// combining based on machine trace metrics. Only combine a sequence of +/// instructions when this neither lengthens the critical path nor increases +/// resource pressure. When optimizing for codesize always combine when the new +/// sequence is shorter. +bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { + bool Changed = false; + DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + + auto BlockIter = MBB->begin(); + + while (BlockIter != MBB->end()) { + auto &MI = *BlockIter++; + + DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); + SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Pattern; + // The motivating example is: + // + // MUL Other MUL_op1 MUL_op2 Other + // \ / \ | / + // ADD/SUB => MADD/MSUB + // (=Root) (=NewRoot) + + // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is + // usually beneficial for code size it unfortunately can hurt performance + // when the ADD is on the critical path, but the MUL is not. With the + // substitution the MUL becomes part of the critical path (in form of the + // MADD) and can lengthen it on architectures where the MADD latency is + // longer than the ADD latency. + // + // For each instruction we check if it can be the root of a combiner + // pattern. Then for each pattern the new code sequence in form of MI is + // generated and evaluated. When the efficiency criteria (don't lengthen + // critical path, don't use more resources) is met the new sequence gets + // hooked up into the basic block before the old sequence is removed. + // + // The algorithm does not try to evaluate all patterns and pick the best. + // This is only an artificial restriction though. In practice there is + // mostly one pattern and hasPattern() can order patterns based on an + // internal cost heuristic. + + if (TII->hasPattern(MI, Pattern)) { + for (auto P : Pattern) { + SmallVector<MachineInstr *, 16> InsInstrs; + SmallVector<MachineInstr *, 16> DelInstrs; + DenseMap<unsigned, unsigned> InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!InsInstrs.size()) + continue; + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lenghten the critical path nor increases + // resource pressure. + if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || + (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) & MI, + (MachineInstr *)InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); + Traces->verifyAnalysis(); + // Eagerly stop after the first pattern fired + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + for (auto *InstrPtr : InsInstrs) { + MachineFunction *MF = MBB->getParent(); + MF->DeleteMachineInstr((MachineInstr *)InstrPtr); + } + } + InstrIdxForVirtReg.clear(); + } + } + } + + return Changed; +} + +bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = + MF.getTarget().getSubtarget<TargetSubtargetInfo>(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); + TSchedModel.init(SchedModel, &STI, TII); + MRI = &MF.getRegInfo(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; + + OptSize = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + + DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + if (!TII->useMachineCombiner()) { + DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); + return false; + } + + bool Changed = false; + + // Try to combine instructions. + for (auto &MBB : MF) + Changed |= combineInstructions(&MBB); + + return Changed; +} diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 3119a35c2e9f..cbd62728ace9 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "codegen-cp" @@ -335,8 +336,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp index 0bee84668f01..acb7c4810b16 100644 --- a/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/lib/CodeGen/MachineDominanceFrontier.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominanceFrontier.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 04c8ecbf9bdc..df60cf34b610 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -35,6 +35,8 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + CriticalEdgesToSplit.clear(); + NewBBs.clear(); DT->recalculate(F); return false; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 7e9b7559517d..6b4cba6c6782 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "codegen" @@ -52,17 +53,19 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { } MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, - unsigned FunctionNum, MachineModuleInfo &mmi, - GCModuleInfo* gmi) - : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { - if (TM.getRegisterInfo()) - RegInfo = new (Allocator) MachineRegisterInfo(TM); + unsigned FunctionNum, MachineModuleInfo &mmi) + : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()), + MMI(mmi) { + if (STI->getRegisterInfo()) + RegInfo = new (Allocator) MachineRegisterInfo(this); else RegInfo = nullptr; MFInfo = nullptr; - FrameInfo = - new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); + FrameInfo = new (Allocator) + MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(), + STI->getFrameLowering()->isStackRealignable(), + !F->hasFnAttribute("no-realign-stack")); if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackAlignment)) @@ -70,13 +73,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, getStackAlignment(AttributeSet::FunctionIndex)); ConstantPool = new (Allocator) MachineConstantPool(TM); - Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) Alignment = std::max(Alignment, - TM.getTargetLowering()->getPrefFunctionAlignment()); + STI->getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; JumpTableInfo = nullptr; @@ -229,10 +232,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand * MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); } MachineMemOperand * @@ -243,12 +246,12 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, - MMO->getBaseAlignment(), nullptr); + MMO->getBaseAlignment()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, - MMO->getBaseAlignment(), nullptr); + MMO->getBaseAlignment()); } MachineInstr::mmo_iterator @@ -279,7 +282,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getTBAAInfo()); + (*I)->getAAInfo()); Result[Index] = JustLoad; } ++Index; @@ -311,7 +314,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getTBAAInfo()); + (*I)->getAAInfo()); Result[Index] = JustStore; } ++Index; @@ -350,7 +353,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { // Print Constant Pool ConstantPool->print(OS); - const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo(); if (RegInfo && !RegInfo->livein_empty()) { OS << "Function Live Ins: "; @@ -459,7 +462,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { - const DataLayout *DL = getTarget().getDataLayout(); + const DataLayout *DL = getSubtarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); @@ -474,7 +477,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const DataLayout *DL = getTarget().getDataLayout(); + const DataLayout *DL = getSubtarget().getDataLayout(); return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -483,15 +486,11 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// -const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const { - return TM.getFrameLowering(); -} - /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!getFrameLowering()->isStackRealignable() || !RealignOption) - assert(Align <= getFrameLowering()->getStackAlignment() && + if (!StackRealignable || !RealignOption) + assert(Align <= StackAlignment && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; } @@ -513,11 +512,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca)); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, + !isSS)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -530,9 +528,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment( - !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, - getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -547,10 +544,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = clampStackAlignment( - !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, - getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } @@ -561,20 +557,18 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { + bool Immutable, bool isAliased) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the // object is 16-byte aligned. - unsigned StackAlign = getFrameLowering()->getStackAlignment(); - unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + unsigned Align = MinAlign(SPOffset, StackAlignment); + Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, + StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, - /*Alloca*/ nullptr)); + /*Alloca*/ nullptr, isAliased)); return -++NumFixedObjects; } @@ -582,25 +576,32 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, /// on the stack. Returns an index with a negative value. int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset) { - unsigned StackAlign = getFrameLowering()->getStackAlignment(); - unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + unsigned Align = MinAlign(SPOffset, StackAlignment); + Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, + StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, /*Immutable*/ true, /*isSS*/ true, - /*Alloca*/ nullptr)); + /*Alloca*/ nullptr, + /*isAliased*/ false)); return -++NumFixedObjects; } +int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) { + // Force the use of a frame pointer. The intention is that this intrinsic be + // used in conjunction with unwind mechanisms that leak the frame pointer. + setFrameAddressIsTaken(true); + Size = RoundUpToAlignment(Size, StackAlignment); + return CreateStackObject(Size, StackAlignment, false); +} + BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { assert(MBB && "MBB must be valid"); const MachineFunction *MF = MBB->getParent(); assert(MF && "MBB must be part of a MachineFunction"); const TargetMachine &TM = MF->getTarget(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); BitVector BV(TRI->getNumRegs()); // Before CSI is calculated, no registers are considered pristine. They can be @@ -625,8 +626,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { } unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); unsigned MaxAlign = getMaxAlignment(); int Offset = 0; @@ -676,7 +677,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; - const TargetFrameLowering *FI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); OS << "Frame Objects:\n"; @@ -820,7 +821,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } const DataLayout *MachineConstantPool::getDataLayout() const { - return TM.getDataLayout(); + return TM.getSubtargetImpl()->getDataLayout(); } Type *MachineConstantPoolEntry::getType() const { diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 46cd60ac613e..f6f34ba9d927 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -46,8 +46,7 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) { bool MachineFunctionAnalysis::runOnFunction(Function &F) { assert(!MF && "MachineFunctionAnalysis already initialized!"); MF = new MachineFunction(&F, TM, NextFnNum++, - getAnalysis<MachineModuleInfo>(), - getAnalysisIfAvailable<GCModuleInfo>()); + getAnalysis<MachineModuleInfo>()); return false; } diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index dee3977cd080..790f5accdb26 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -52,7 +52,7 @@ char MachineFunctionPrinterPass::ID = 0; } char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; -INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs", +INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer", "Machine Function Printer", false, false) namespace llvm { diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 5122165e3659..968ec2c68e8c 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -39,6 +39,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -105,23 +106,41 @@ void MachineOperand::setIsDef(bool Val) { IsDef = Val; } +// If this operand is currently a register operand, and if this is in a +// function, deregister the operand from the register's use/def list. +void MachineOperand::removeRegFromUses() { + if (!isReg() || !isOnRegUseList()) + return; + + if (MachineInstr *MI = getParent()) { + if (MachineBasicBlock *MBB = MI->getParent()) { + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); + } + } +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); - // If this operand is currently a register operand, and if this is in a - // function, deregister the operand from the register's use/def list. - if (isReg() && isOnRegUseList()) - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - MF->getRegInfo().removeRegOperandFromUseList(this); + + removeRegFromUses(); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; } +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_FPImmediate; + Contents.CFP = FPImm; +} + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -265,7 +284,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (const MachineBasicBlock *MBB = MI->getParent()) if (const MachineFunction *MF = MBB->getParent()) TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr; + const TargetRegisterInfo *TRI = + TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; switch (getType()) { case MachineOperand::MO_Register: @@ -377,7 +397,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { break; case MachineOperand::MO_Metadata: OS << '<'; - getMetadata()->printAsOperand(OS, /*PrintType=*/false); + getMetadata()->printAsOperand(OS); OS << '>'; break; case MachineOperand::MO_MCSymbol: @@ -429,11 +449,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, uint64_t s, unsigned int a, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - TBAAInfo(TBAAInfo), Ranges(Ranges) { + AAInfo(AAInfo), Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() || isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) && "invalid pointer value"); @@ -514,10 +534,38 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << "(align=" << MMO.getAlignment() << ")"; // Print TBAA info. - if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) { + if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) { OS << "(tbaa="; if (TBAAInfo->getNumOperands() > 0) - TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); + TBAAInfo->getOperand(0)->printAsOperand(OS); + else + OS << "<unknown>"; + OS << ")"; + } + + // Print AA scope info. + if (const MDNode *ScopeInfo = MMO.getAAInfo().Scope) { + OS << "(alias.scope="; + if (ScopeInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { + ScopeInfo->getOperand(i)->printAsOperand(OS); + if (i != ie-1) + OS << ","; + } + else + OS << "<unknown>"; + OS << ")"; + } + + // Print AA noalias scope info. + if (const MDNode *NoAliasInfo = MMO.getAAInfo().NoAlias) { + OS << "(noalias="; + if (NoAliasInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { + NoAliasInfo->getOperand(i)->printAsOperand(OS); + if (i != ie-1) + OS << ","; + } else OS << "<unknown>"; OS << ")"; @@ -551,6 +599,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) { + assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + // Reserve space for the expected number of operands. if (unsigned NumOps = MCID->getNumOperands() + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { @@ -569,6 +619,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { + assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + CapOperands = OperandCapacity::get(MI.getNumOperands()); Operands = MF.allocateOperandArray(CapOperands); @@ -865,6 +917,27 @@ void MachineInstr::eraseFromParent() { getParent()->erase(this); } +void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { + assert(getParent() && "Not embedded in a basic block!"); + MachineBasicBlock *MBB = getParent(); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Not embedded in a function!"); + + MachineInstr *MI = (MachineInstr *)this; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MRI.markUsesInDebugValueAsUndef(Reg); + } + MI->eraseFromParent(); +} + void MachineInstr::eraseFromBundle() { assert(getParent() && "Not embedded in a basic block!"); getParent()->erase_instr(this); @@ -1379,7 +1452,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( AliasAnalysis::Location(V, (*I)->getSize(), - (*I)->getTBAAInfo()))) + (*I)->getAAInfo()))) continue; } @@ -1489,8 +1562,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " = "; // Print the opcode name. - if (TM && TM->getInstrInfo()) - OS << TM->getInstrInfo()->getName(getOpcode()); + if (TM && TM->getSubtargetImpl()->getInstrInfo()) + OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode()); else OS << "UNKNOWN"; @@ -1538,17 +1611,17 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MF && isCall() && + if (MRI && isCall() && MO.isReg() && MO.isImplicit() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - const MachineRegisterInfo &MRI = MF->getRegInfo(); - if (MRI.use_empty(Reg)) { + if (MRI->use_empty(Reg)) { bool HasAliasLive = false; - for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); + for (MCRegAliasIterator AI( + Reg, TM->getSubtargetImpl()->getRegisterInfo(), true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; - if (!MRI.use_empty(AliasReg)) { + if (!MRI->use_empty(AliasReg)) { HasAliasLive = true; break; } @@ -1573,12 +1646,16 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (isDebugValue() && MO.isMetadata()) { // Pretty print DBG_VALUE instructions. const MDNode *MD = MO.getMetadata(); - if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2))) - OS << "!\"" << MDS->getString() << '\"'; + DIDescriptor DI(MD); + DIVariable DIV(MD); + + if (DI.isVariable() && !DIV.getName().empty()) + OS << "!\"" << DIV.getName() << '\"'; else MO.print(OS, TM); } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { - OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName( + MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -1595,9 +1672,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, unsigned RCID = 0; if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { - if (TM) - OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName(); - else + if (TM) { + const TargetRegisterInfo *TRI = + TM->getSubtargetImpl()->getRegisterInfo(); + OS << ':' + << TRI->getRegClassName(TRI->getRegClass(RCID)); + } else OS << ":RC" << RCID; } @@ -1646,7 +1726,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (!HaveSemi) OS << ";"; HaveSemi = true; for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]); + OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC) + << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClass(VirtRegs[j]) != RC) { ++j; @@ -1672,6 +1753,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " ]"; } } + if (isIndirectDebugValue()) + OS << " indirect"; } else if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; @@ -1881,7 +1964,8 @@ void MachineInstr::emitError(StringRef Msg) const { if (getOperand(i-1).isMetadata() && (LocMD = getOperand(i-1).getMetadata()) && LocMD->getNumOperands() != 0) { - if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { LocCookie = CI->getZExtValue(); break; } diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 962169e1b15c..0690f08d495b 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -16,6 +16,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { @@ -103,12 +104,12 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, assert(FirstMI != LastMI && "Empty bundle?"); MIBundleBuilder Bundle(MBB, FirstMI, LastMI); - const TargetMachine &TM = MBB.getParent()->getTarget(); - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(), - TII->get(TargetOpcode::BUNDLE)); + MachineInstrBuilder MIB = + BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); Bundle.prepend(MIB); SmallVector<unsigned, 32> LocalDefs; @@ -140,7 +141,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, // Internal def is now killed. KilledDefSet.insert(Reg); } else { - if (ExternUseSet.insert(Reg)) { + if (ExternUseSet.insert(Reg).second) { ExternUses.push_back(Reg); if (MO.isUndef()) UndefUseSet.insert(Reg); @@ -157,7 +158,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, if (!Reg) continue; - if (LocalDefSet.insert(Reg)) { + if (LocalDefSet.insert(Reg).second) { LocalDefs.push_back(Reg); if (MO.isDead()) { DeadDefSet.insert(Reg); @@ -173,7 +174,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, if (!MO.isDead()) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; - if (LocalDefSet.insert(SubReg)) + if (LocalDefSet.insert(SubReg).second) LocalDefs.push_back(SubReg); } } @@ -185,7 +186,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, SmallSet<unsigned, 32> Added; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; - if (Added.insert(Reg)) { + if (Added.insert(Reg).second) { // If it's not live beyond end of the bundle, mark it dead. bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 68d2efdb1d0d..cb14a5cab4b4 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -39,6 +39,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-licm" @@ -48,6 +49,11 @@ AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden); +static cl::opt<bool> +HoistCheapInsts("hoist-cheap-insts", + cl::desc("MachineLICM should hoist even cheap instructions"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -61,7 +67,6 @@ STATISTIC(NumPostRAHoisted, namespace { class MachineLICM : public MachineFunctionPass { - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; @@ -142,9 +147,6 @@ namespace { RegPressure.clear(); RegLimit.clear(); BackTrace.clear(); - for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator - CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) - CI->second.clear(); CSEMap.clear(); } @@ -324,13 +326,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { return false; Changed = FirstInLoop = false; - TM = &MF.getTarget(); - TII = TM->getInstrInfo(); - TLI = TM->getTargetLowering(); - TRI = TM->getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TLI = MF.getSubtarget().getTargetLowering(); + TRI = MF.getSubtarget().getRegisterInfo(); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); - InstrItins = TM->getInstrItineraryData(); + InstrItins = MF.getSubtarget().getInstrItineraryData(); PreRegAlloc = MRI->isSSA(); @@ -822,7 +823,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - bool isNew = RegSeen.insert(Reg); + bool isNew = RegSeen.insert(Reg).second; unsigned RCId, RCCost; getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); if (MO.isDef()) @@ -854,7 +855,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - bool isNew = RegSeen.insert(Reg); + bool isNew = RegSeen.insert(Reg).second; if (MO.isDef()) Defs.push_back(Reg); else if (!isNew && isOperandKill(MO, MRI)) { @@ -1039,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// IsCheapInstruction - Return true if the instruction is marked "cheap" or /// the operand latency between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { - if (MI.isAsCheapAsAMove() || MI.isCopyLike()) + if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) return true; if (!InstrItins || InstrItins->isEmpty()) return false; @@ -1079,7 +1080,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, // Don't hoist cheap instructions if they would increase register pressure, // even if we're under the limit. - if (CheapInstr) + if (CheapInstr && !HoistCheapInsts) return true; for (unsigned i = BackTrace.size(); i != 0; --i) { @@ -1299,15 +1300,7 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator - CI = CSEMap.find(Opcode); - if (CI != CSEMap.end()) - CI->second.push_back(MI); - else { - std::vector<const MachineInstr*> CSEMIs; - CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(Opcode, CSEMIs)); - } + CSEMap[Opcode].push_back(MI); } } @@ -1447,11 +1440,8 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Add to the CSE map. if (CI != CSEMap.end()) CI->second.push_back(MI); - else { - std::vector<const MachineInstr*> CSEMIs; - CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(Opcode, CSEMIs)); - } + else + CSEMap[Opcode].push_back(MI); } ++NumHoisted; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 4976e35dba82..32d511285eb2 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -270,11 +270,10 @@ MachineModuleInfo::~MachineModuleInfo() { bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = nullptr; - CompactUnwindEncoding = 0; CurCallSite = 0; CallsEHReturn = 0; CallsUnwindInit = 0; - DbgInfoAvailable = UsesVAFloatArgument = false; + DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; // Always emit some info, by default "no personality" info. Personalities.push_back(nullptr); AddrLabelSymbols = nullptr; @@ -312,7 +311,6 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; - CompactUnwindEncoding = 0; VariableDbgInfos.clear(); } @@ -429,7 +427,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, /// void MachineModuleInfo:: addCatchTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalVariable *> TyInfo) { + ArrayRef<const GlobalValue *> TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); for (unsigned N = TyInfo.size(); N; --N) LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); @@ -439,7 +437,7 @@ addCatchTypeInfo(MachineBasicBlock *LandingPad, /// void MachineModuleInfo:: addFilterTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef<const GlobalVariable *> TyInfo) { + ArrayRef<const GlobalValue *> TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); std::vector<unsigned> IdsInFilter(TyInfo.size()); for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) @@ -454,6 +452,14 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { LP.TypeIds.push_back(0); } +MCSymbol * +MachineModuleInfo::addClauseForLandingPad(MachineBasicBlock *LandingPad) { + MCSymbol *ClauseLabel = Context.CreateTempSymbol(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.ClauseLabels.push_back(ClauseLabel); + return ClauseLabel; +} + /// TidyLandingPads - Remap landing pad labels and remove any deleted landing /// pads. void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { @@ -508,7 +514,7 @@ void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, /// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. -unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { +unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) { for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) if (TypeInfos[i] == TI) return i + 1; diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp index c6b6802b0eb1..01d2c2eb56fe 100644 --- a/lib/CodeGen/MachineRegionInfo.cpp +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -1,8 +1,10 @@ #include "llvm/CodeGen/MachineRegionInfo.h" -#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" +#include "llvm/CodeGen/MachinePostDominators.h" + +#define DEBUG_TYPE "region" using namespace llvm; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f560259a98d9..86bb34be3fe8 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -16,28 +16,23 @@ #include "llvm/Support/raw_os_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; // Pin the vtable to this file. void MachineRegisterInfo::Delegate::anchor() {} -MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) - : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) { +MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) + : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true), + TracksSubRegLiveness(false) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. - PhysRegUseDefLists = - new MachineOperand*[getTargetRegisterInfo()->getNumRegs()]; - memset(PhysRegUseDefLists, 0, - sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs()); -} - -MachineRegisterInfo::~MachineRegisterInfo() { - delete [] PhysRegUseDefLists; + PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr); } /// setRegClass - Set the register class of the specified virtual register. @@ -67,7 +62,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, bool MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { - const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); @@ -134,6 +129,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; + continue; } MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); @@ -283,18 +279,25 @@ void MachineRegisterInfo::moveOperands(MachineOperand *Dst, /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. +/// If ToReg is a physical register we apply the sub register to obtain the +/// final/proper physical register. void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { assert(FromReg != ToReg && "Cannot replace a reg with itself"); + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + // TODO: This could be more efficient by bulk changing the operands. for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { MachineOperand &O = *I; ++I; - O.setReg(ToReg); + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + O.substPhysReg(ToReg, *TRI); + } else { + O.setReg(ToReg); + } } } - /// getVRegDef - Return the machine instr that defines the specified virtual /// register or null if none is found. This assumes that the code is in SSA /// form, so there should only be one definition. @@ -390,6 +393,14 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, } } +unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const +{ + // Lane masks are only defined for vregs. + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const TargetRegisterClass &TRC = *getRegClass(Reg); + return TRC.getLaneMask(); +} + #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (MachineInstr &I : use_instructions(Reg)) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index d9173a27dc41..71a6ebaba243 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -24,8 +24,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; @@ -39,7 +39,7 @@ static AvailableValsTy &getAvailableVals(void *AV) { MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl<MachineInstr*> *NewPHI) : AV(nullptr), InsertedPHIs(NewPHI) { - TII = MF.getTarget().getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 44191f785386..9fe23c5b2272 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -40,6 +40,9 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, cl::desc("Force bottom-up list scheduling")); +cl::opt<bool> +DumpCriticalPathLength("misched-dcpl", cl::Hidden, + cl::desc("Print critical path length to stdout")); } #ifndef NDEBUG @@ -141,12 +144,12 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", +INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "misched", +INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -378,7 +381,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI, /// Main driver for both MachineScheduler and PostMachineScheduler. void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. @@ -427,9 +430,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { // instruction stream until we find the nearest boundary. unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { + for(;I != MBB->begin(); --I, --RemainingInstrs) { if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA)) break; + if (!I->isDebugValue()) + ++NumRegionInstrs; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. @@ -451,6 +456,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << " Remaining: " << RemainingInstrs << "\n"); + if (DumpCriticalPathLength) { + errs() << MF->getName(); + errs() << ":BB# " << MBB->getNumber(); + errs() << " " << MBB->getName() << " \n"; + } // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -2355,14 +2365,15 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); if (!Top.HazardRec) { Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } if (!Bot.HazardRec) { Bot.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } } @@ -2370,8 +2381,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { - const TargetMachine &TM = Context->MF->getTarget(); - const TargetLowering *TLI = TM.getTargetLowering(); + const MachineFunction &MF = *Begin->getParent()->getParent(); + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Avoid setting up the register pressure tracker for small regions to save // compile time. As a rough heuristic, only track pressure when the number of @@ -2391,8 +2402,8 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.OnlyBottomUp = true; // Allow the subtarget to override default policy. - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End, + NumRegionInstrs); // After subtarget overrides, apply command line options. if (!EnableRegPressure) @@ -2460,7 +2471,10 @@ void GenericScheduler::registerRoots() { if ((*I)->getDepth() > Rem.CriticalPath) Rem.CriticalPath = (*I)->getDepth(); } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; + } if (EnableCyclicPath) { Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); @@ -2482,8 +2496,8 @@ static bool tryPressure(const PressureChange &TryP, } // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. - if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, - Reason)) { + if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { return true; } // If the candidates are decreasing pressure, reverse priority. @@ -2885,10 +2899,10 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, // or are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); if (!Top.HazardRec) { Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } } @@ -2902,7 +2916,10 @@ void PostGenericScheduler::registerRoots() { if ((*I)->getDepth() > Rem.CriticalPath) Rem.CriticalPath = (*I)->getDepth(); } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; + } } /// Apply a set of heursitics to a new candidate for PostRA scheduling. diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index f44e4d1eaabd..8337793d960d 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -17,18 +17,21 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-sink" @@ -38,6 +41,12 @@ SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); +static cl::opt<bool> +UseBlockFreqInfo("machine-sink-bfi", + cl::desc("Use block frequency info to find successors to sink"), + cl::init(true), cl::Hidden); + + STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); @@ -46,14 +55,20 @@ namespace { class MachineSinking : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; // Machine register information - MachineDominatorTree *DT; // Machine dominator tree + MachineRegisterInfo *MRI; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree + MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; + const MachineBlockFrequencyInfo *MBFI; AliasAnalysis *AA; // Remember which edges have been considered for breaking. SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> CEBCandidates; + // Remember which edges we are about to split. + // This is different from CEBCandidates since those edges + // will be split. + SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit; public: static char ID; // Pass identification @@ -68,9 +83,13 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachinePostDominatorTree>(); AU.addPreserved<MachineLoopInfo>(); + if (UseBlockFreqInfo) + AU.addRequired<MachineBlockFrequencyInfo>(); } void releaseMemory() override { @@ -82,10 +101,22 @@ namespace { bool isWorthBreakingCriticalEdge(MachineInstr *MI, MachineBasicBlock *From, MachineBasicBlock *To); - MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI, - MachineBasicBlock *From, - MachineBasicBlock *To, - bool BreakPHIEdge); + /// \brief Postpone the splitting of the given critical + /// edge (\p From, \p To). + /// + /// We do not split the edges on the fly. Indeed, this invalidates + /// the dominance information and thus triggers a lot of updates + /// of that information underneath. + /// Instead, we postpone all the splits after each iteration of + /// the main loop. That way, the information is at least valid + /// for the lifetime of an iteration. + /// + /// \return True if the edge is marked as toSplit, false otherwise. + /// False can be returned if, for instance, this is not profitable. + bool PostponeSplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To, + bool BreakPHIEdge); bool SinkInstruction(MachineInstr *MI, bool &SawStore); bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, @@ -135,6 +166,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, DEBUG(dbgs() << "*** to: " << *MI); MRI->replaceRegWith(DstReg, SrcReg); MI->eraseFromParent(); + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + MRI->clearKillFlags(SrcReg); + ++NumCoalesces; return true; } @@ -213,12 +249,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Machine Sinking ********\n"); - const TargetMachine &TM = MF.getTarget(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); + PDT = &getAnalysis<MachinePostDominatorTree>(); LI = &getAnalysis<MachineLoopInfo>(); + MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; AA = &getAnalysis<AliasAnalysis>(); bool EverMadeChange = false; @@ -228,10 +265,24 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // Process all basic blocks. CEBCandidates.clear(); + ToSplit.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); + // If we have anything we marked as toSplit, split it now. + for (auto &Pair : ToSplit) { + auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this); + if (NewSucc != nullptr) { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << Pair.first->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << Pair.second->getNumber() << '\n'); + MadeChange = true; + ++NumSplit; + } else + DEBUG(dbgs() << " *** Not legal to break critical edge\n"); + } // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; @@ -289,10 +340,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // If the pass has already considered breaking this edge (during this pass // through the function), then let's go ahead and break it. This means // sinking multiple "cheap" instructions into the same block. - if (!CEBCandidates.insert(std::make_pair(From, To))) + if (!CEBCandidates.insert(std::make_pair(From, To)).second) return true; - if (!MI->isCopy() && !MI->isAsCheapAsAMove()) + if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI)) return true; // MI is cheap, we probably don't want to break the critical edge for it. @@ -328,21 +379,21 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, return false; } -MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, - MachineBasicBlock *FromBB, - MachineBasicBlock *ToBB, - bool BreakPHIEdge) { +bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) - return nullptr; + return false; // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) - return nullptr; + return false; // Check for backedges of more "complex" loops. if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && LI->isLoopHeader(ToBB)) - return nullptr; + return false; // It's not always legal to break critical edges and sink the computation // to the edge. @@ -389,11 +440,13 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, if (*PI == FromBB) continue; if (!DT->dominates(ToBB, *PI)) - return nullptr; + return false; } } - return FromBB->SplitCriticalEdge(ToBB, this); + ToSplit.insert(std::make_pair(FromBB, ToBB)); + + return true; } static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { @@ -419,23 +472,6 @@ static void collectDebugValues(MachineInstr *MI, } } -/// isPostDominatedBy - Return true if A is post dominated by B. -static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { - - // FIXME - Use real post dominator. - if (A->succ_size() != 2) - return false; - MachineBasicBlock::succ_iterator I = A->succ_begin(); - if (B == *I) - ++I; - MachineBasicBlock *OtherSuccBlock = *I; - if (OtherSuccBlock->succ_size() != 1 || - *(OtherSuccBlock->succ_begin()) != B) - return false; - - return true; -} - /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, MachineBasicBlock *MBB, @@ -447,8 +483,13 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, return false; // It is profitable if SuccToSinkTo does not post dominate current block. - if (!isPostDominatedBy(MBB, SuccToSinkTo)) - return true; + if (!PDT->dominates(SuccToSinkTo, MBB)) + return true; + + // It is profitable to sink an instruction from a deeper loop to a shallower + // loop, even if the latter post-dominates the former (PR21115). + if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + return true; // Check if only use in post dominated block is PHI instruction. bool NonPHIUse = false; @@ -463,7 +504,7 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, // If SuccToSinkTo post dominates then also it may be profitable if MI // can further profitably sinked into another block in next round. bool BreakPHIEdge = false; - // FIXME - If finding successor is compile time expensive then catch results. + // FIXME - If finding successor is compile time expensive then cache results. if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2); @@ -512,19 +553,6 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) return nullptr; - // FIXME: This picks a successor to sink into based on having one - // successor that dominates all the uses. However, there are cases where - // sinking can happen but where the sink point isn't a successor. For - // example: - // - // x = computation - // if () {} else {} - // use x - // - // the instruction could be sunk over the whole diamond for the - // if/then/else (or loop, etc), allowing it to be sunk into other blocks - // after that. - // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { @@ -539,14 +567,37 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, } // Otherwise, we should look at all the successors and decide which one - // we should sink to. - // We give successors with smaller loop depth higher priority. - SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); - // Sort Successors according to their loop depth. + // we should sink to. If we have reliable block frequency information + // (frequency != 0) available, give successors with smaller frequencies + // higher priority, otherwise prioritize smaller loop depths. + SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), + MBB->succ_end()); + + // Handle cases where sinking can happen but where the sink point isn't a + // successor. For example: + // + // x = computation + // if () {} else {} + // use x + // + const std::vector<MachineDomTreeNode *> &Children = + DT->getNode(MBB)->getChildren(); + for (const auto &DTChild : Children) + // DomTree children of MBB that have MBB as immediate dominator are added. + if (DTChild->getIDom()->getBlock() == MI->getParent() && + // Skip MBBs already added to the Succs vector above. + !MBB->isSuccessor(DTChild->getBlock())) + Succs.push_back(DTChild->getBlock()); + + // Sort Successors according to their loop depth or block frequency info. std::stable_sort( Succs.begin(), Succs.end(), - [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) { - return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { + uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; + uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; + bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; + return HasBlockFreq ? LHSFreq < RHSFreq + : LI->getLoopDepth(L) < LI->getLoopDepth(R); }); for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(), E = Succs.end(); SI != E; ++SI) { @@ -655,21 +706,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (!TryBreak) DEBUG(dbgs() << "Sinking along critical edge.\n"); else { - MachineBasicBlock *NewSucc = - SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); - if (!NewSucc) { + // Mark this edge as to be split. + // If the edge can actually be split, the next iteration of the main loop + // will sink MI in the newly created block. + bool Status = + PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); + if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " - "break critical edge\n"); - return false; - } else { - DEBUG(dbgs() << " *** Splitting critical edge:" - " BB#" << ParentBlock->getNumber() - << " -- BB#" << NewSucc->getNumber() - << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); - SuccToSinkTo = NewSucc; - ++NumSplit; - BreakPHIEdge = false; - } + "break critical edge\n"); + // The instruction will not be sunk this time. + return false; } } @@ -677,20 +723,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. - MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock, - SuccToSinkTo, BreakPHIEdge); - if (!NewSucc) { + bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, + SuccToSinkTo, BreakPHIEdge); + if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); - return false; - } - - DEBUG(dbgs() << " *** Splitting critical edge:" - " BB#" << ParentBlock->getNumber() - << " -- BB#" << NewSucc->getNumber() - << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); - SuccToSinkTo = NewSucc; - ++NumSplit; + // The instruction will not be sunk this time. + return false; } // Determine where to insert into. Skip phi nodes. diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 1bbf0ade6a58..2cf87eb9104f 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -52,13 +52,13 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; - TII = MF->getTarget().getInstrInfo(); - TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); MRI = &MF->getRegInfo(); Loops = &getAnalysis<MachineLoopInfo>(); const TargetSubtargetInfo &ST = MF->getTarget().getSubtarget<TargetSubtargetInfo>(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); + SchedModel.init(ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); ProcResourceCycles.resize(MF->getNumBlockIDs() * SchedModel.getNumProcResourceKinds()); @@ -135,8 +135,7 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { "getResources() must be called before getProcResourceCycles()"); unsigned PRKinds = SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); - return ArrayRef<unsigned>(ProcResourceCycles.data() + MBBNum * PRKinds, - PRKinds); + return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds); } @@ -256,8 +255,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceDepths(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); - return ArrayRef<unsigned>(ProcResourceDepths.data() + MBBNum * PRKinds, - PRKinds); + return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); } /// Get an array of processor resource heights for MBB. Indexed by processor @@ -270,8 +268,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceHeights(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); - return ArrayRef<unsigned>(ProcResourceHeights.data() + MBBNum * PRKinds, - PRKinds); + return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); } //===----------------------------------------------------------------------===// @@ -452,7 +449,7 @@ public: } // To is a new block. Mark the block as visited in case the CFG has cycles // that MachineLoopInfo didn't recognize as a natural loop. - return LB.Visited.insert(To); + return LB.Visited.insert(To).second; } }; } @@ -1169,6 +1166,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { return DepCycle; } +/// When bottom is set include instructions in current block in estimate. unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { // Find the limiting processor resource. // Numbers have been pre-scaled to be comparable. @@ -1185,7 +1183,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); + /// All instructions before current block unsigned Instrs = TBI.InstrDepth; + // plus instructions in current block if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) @@ -1194,44 +1194,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { return std::max(Instrs, PRMax); } - -unsigned MachineTraceMetrics::Trace:: -getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks, - ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const { +unsigned MachineTraceMetrics::Trace::getResourceLength( + ArrayRef<const MachineBasicBlock *> Extrablocks, + ArrayRef<const MCSchedClassDesc *> ExtraInstrs, + ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const { // Add up resources above and below the center block. ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); unsigned PRMax = 0; - for (unsigned K = 0; K != PRDepths.size(); ++K) { - unsigned PRCycles = PRDepths[K] + PRHeights[K]; - for (unsigned I = 0; I != Extrablocks.size(); ++I) - PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; - for (unsigned I = 0; I != ExtraInstrs.size(); ++I) { - const MCSchedClassDesc* SC = ExtraInstrs[I]; + + // Capture computing cycles from extra instructions + auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs, + unsigned ResourceIdx) + ->unsigned { + unsigned Cycles = 0; + for (unsigned I = 0; I != Instrs.size(); ++I) { + const MCSchedClassDesc *SC = Instrs[I]; if (!SC->isValid()) continue; for (TargetSchedModel::ProcResIter - PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), - PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - if (PI->ProcResourceIdx != K) + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if (PI->ProcResourceIdx != ResourceIdx) continue; - PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K)); + Cycles += + (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); } } + return Cycles; + }; + + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRCycles += extraCycles(ExtraInstrs, K); + PRCycles -= extraCycles(RemoveInstrs, K); PRMax = std::max(PRMax, PRCycles); } // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); + // Instrs: #instructions in current trace outside current block. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + // Add instruction count from the extra blocks. for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + Instrs += ExtraInstrs.size(); + Instrs -= RemoveInstrs.size(); if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. return std::max(Instrs, PRMax); } +bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI, + const MachineInstr *UseMI) const { + if (DefMI->getParent() == UseMI->getParent()) + return true; + + const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()]; + const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()]; + + return DepTBI.isUsefulDominator(TBI); +} + void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 8515b0f456d8..364e8e2cb3ed 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -42,10 +42,12 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { @@ -214,9 +216,9 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); void report(const char *msg, const MachineFunction *MF, - const LiveRange &LR); + const LiveRange &LR, unsigned Reg, unsigned LaneMask); void report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR); + const LiveRange &LR, unsigned Reg, unsigned LaneMask); void verifyInlineAsm(const MachineInstr *MI); @@ -229,20 +231,22 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned, + unsigned); void verifyLiveRangeSegment(const LiveRange&, - const LiveRange::const_iterator I, unsigned); - void verifyLiveRange(const LiveRange&, unsigned); + const LiveRange::const_iterator I, unsigned, + unsigned); + void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0); void verifyStackFrame(); }; struct MachineVerifierPass : public MachineFunctionPass { static char ID; // Pass ID, replacement for typeid - const char *const Banner; + const std::string Banner; - MachineVerifierPass(const char *b = nullptr) - : MachineFunctionPass(ID), Banner(b) { + MachineVerifierPass(const std::string &banner = nullptr) + : MachineFunctionPass(ID), Banner(banner) { initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); } @@ -252,7 +256,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override { - MF.verify(this, Banner); + MF.verify(this, Banner.c_str()); return false; } }; @@ -263,7 +267,7 @@ char MachineVerifierPass::ID = 0; INITIALIZE_PASS(MachineVerifierPass, "machineverifier", "Verify generated machine code", false, false) -FunctionPass *llvm::createMachineVerifierPass(const char *Banner) { +FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) { return new MachineVerifierPass(Banner); } @@ -275,11 +279,12 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = nullptr; if (OutFileName) { - std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + std::error_code EC; + OutFile = new raw_fd_ostream(OutFileName, EC, sys::fs::F_Append | sys::fs::F_Text); - if (!ErrorInfo.empty()) { - errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; + if (EC) { + errs() << "Error opening '" << OutFileName << "': " << EC.message() + << '\n'; exit(1); } @@ -292,8 +297,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TM = &MF.getTarget(); - TII = TM->getInstrInfo(); - TRI = TM->getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); LiveVars = nullptr; @@ -430,15 +435,23 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR) { + const LiveRange &LR, unsigned Reg, + unsigned LaneMask) { report(msg, MBB); - *OS << "- liverange: " << LR << "\n"; + *OS << "- liverange: " << LR << '\n'; + *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; + if (LaneMask != 0) + *OS << "- lanemask: " << format("%04X\n", LaneMask); } void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveRange &LR) { + const LiveRange &LR, unsigned Reg, + unsigned LaneMask) { report(msg, MF); - *OS << "- liverange: " << LR << "\n"; + *OS << "- liverange: " << LR << '\n'; + *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; + if (LaneMask != 0) + *OS << "- lanemask: " << format("%04X\n", LaneMask); } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -588,7 +601,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } } else if (TBB && !FBB && Cond.empty()) { // Block unconditionally branches somewhere. - if (MBB->succ_size() != 1+LandingPadSuccs.size()) { + // If the block has exactly one successor, that happens to be a + // landingpad, accept it as valid control flow. + if (MBB->succ_size() != 1+LandingPadSuccs.size() && + (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 || + *MBB->succ_begin() != *LandingPadSuccs.begin())) { report("MBB exits via unconditional branch but doesn't have " "exactly one CFG successor!", MBB); } else if (!MBB->isSuccessor(TBB)) { @@ -905,7 +922,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(Reg) << " is not a " - << DRC->getName() << " register.\n"; + << TRI->getRegClassName(DRC) << " register.\n"; } } } else { @@ -916,13 +933,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); - *OS << "Register class " << RC->getName() + *OS << "Register class " << TRI->getRegClassName(RC) << " does not support subreg index " << SubIdx << "\n"; return; } if (RC != SRC) { report("Invalid register class for subregister index", MO, MONum); - *OS << "Register class " << RC->getName() + *OS << "Register class " << TRI->getRegClassName(RC) << " does not fully support subreg index " << SubIdx << "\n"; return; } @@ -944,8 +961,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); - *OS << "Expected a " << DRC->getName() << " register, but got a " - << RC->getName() << " register\n"; + *OS << "Expected a " << TRI->getRegClassName(DRC) + << " register, but got a " << TRI->getRegClassName(RC) + << " register\n"; } } } @@ -1048,7 +1066,18 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (!regsLive.count(Reg)) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. - if (!isReserved(Reg)) + bool Bad = !isReserved(Reg); + // We are fine if just any subregister has a defined value. + if (Bad) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); + ++SubRegs) { + if (regsLive.count(*SubRegs)) { + Bad = false; + break; + } + } + } + if (Bad) report("Using an undefined physical register", MO, MONum); } else if (MRI->def_empty(Reg)) { report("Reading virtual register without a def", MO, MONum); @@ -1349,21 +1378,22 @@ void MachineVerifier::verifyLiveIntervals() { } void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, - const VNInfo *VNI, - unsigned Reg) { + const VNInfo *VNI, unsigned Reg, + unsigned LaneMask) { if (VNI->isUnused()) return; const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LR); + report("Valno not live at def and not marked unused", MF, LR, Reg, + LaneMask); *OS << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live segment at def has different valno", MF, LR); + report("Live segment at def has different valno", MF, LR, Reg, LaneMask); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; @@ -1371,7 +1401,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LR); + report("Invalid definition index", MF, LR, Reg, LaneMask); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LR << '\n'; return; @@ -1379,7 +1409,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LR); + report("PHIDef value is not defined at MBB start", MBB, LR, Reg, + LaneMask); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } @@ -1389,7 +1420,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LR); + report("No instruction at def index", MBB, LR, Reg, LaneMask); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } @@ -1408,6 +1439,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, !TRI->hasRegUnit(MOI->getReg(), Reg)) continue; } + if (LaneMask != 0 && + (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0) + continue; hasDef = true; if (MOI->isEarlyClobber()) isEarlyClobber = true; @@ -1422,12 +1456,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // DEF slots. if (isEarlyClobber) { if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LR); + report("Early clobber def must be at an early-clobber slot", MBB, LR, + Reg, LaneMask); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } else if (!VNI->def.isRegister()) { report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LR); + MBB, LR, Reg, LaneMask); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } @@ -1435,37 +1470,38 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const LiveRange::const_iterator I, - unsigned Reg) { + unsigned Reg, unsigned LaneMask) { const LiveRange::Segment &S = *I; const VNInfo *VNI = S.valno; assert(VNI && "Live segment has no valno"); if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { - report("Foreign valno in live segment", MF, LR); + report("Foreign valno in live segment", MF, LR, Reg, LaneMask); *OS << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live segment valno is marked unused", MF, LR); + report("Live segment valno is marked unused", MF, LR, Reg, LaneMask); *OS << S << '\n'; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LR); + report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask); *OS << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); if (S.start != MBBStartIdx && S.start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LR); + report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg, + LaneMask); *OS << S << '\n'; } const MachineBasicBlock *EndMBB = LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LR); + report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask); *OS << S << '\n'; return; } @@ -1483,14 +1519,16 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const MachineInstr *MI = LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LR); + report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg, + LaneMask); *OS << S << '\n'; return; } // The block slot must refer to a basic block boundary. if (S.end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LR); + report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg, + LaneMask); *OS << S << '\n'; } @@ -1498,7 +1536,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Segment ends on the dead slot. // That means there must be a dead def. if (!SlotIndex::isSameInstr(S.start, S.end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LR); + report("Live segment ending at dead slot spans instructions", EndMBB, LR, + Reg, LaneMask); *OS << S << '\n'; } } @@ -1508,7 +1547,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (S.end.isEarlyClobber()) { if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LR); + "redefined by an EC def in the same instruction", EndMBB, LR, Reg, + LaneMask); *OS << S << '\n'; } } @@ -1519,16 +1559,27 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; + bool hasSubRegDef = false; for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || MOI->getReg() != Reg) continue; + if (LaneMask != 0 && + (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0) + continue; + if (MOI->isDef() && MOI->getSubReg() != 0) + hasSubRegDef = true; if (MOI->readsReg()) hasRead = true; } if (!S.end.isDead()) { if (!hasRead) { - report("Instruction ending live segment doesn't read the register", MI); - *OS << S << " in " << LR << '\n'; + // When tracking subregister liveness, the main range must start new + // values on partial register writes, even if there is no read. + if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) { + report("Instruction ending live segment doesn't read the register", + MI); + *OS << S << " in " << LR << '\n'; + } } } } @@ -1566,7 +1617,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LR); + report("Register not marked live out of predecessor", *PI, LR, Reg, + LaneMask); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; @@ -1575,7 +1627,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LR); + report("Different value live out of predecessor", *PI, LR, Reg, + LaneMask); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() @@ -1588,18 +1641,36 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, } } -void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) { - for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end(); - I != E; ++I) - verifyLiveRangeValue(LR, *I, Reg); +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, + unsigned LaneMask) { + for (const VNInfo *VNI : LR.valnos) + verifyLiveRangeValue(LR, VNI, Reg, LaneMask); for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) - verifyLiveRangeSegment(LR, I, Reg); + verifyLiveRangeSegment(LR, I, Reg, LaneMask); } void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { verifyLiveRange(LI, LI.reg); + unsigned Reg = LI.reg; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + unsigned Mask = 0; + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((Mask & SR.LaneMask) != 0) + report("Lane masks of sub ranges overlap in live interval", MF, LI); + if ((SR.LaneMask & ~MaxMask) != 0) + report("Subrange lanemask is invalid", MF, LI); + Mask |= SR.LaneMask; + verifyLiveRange(SR, LI.reg, SR.LaneMask); + if (!LI.covers(SR)) + report("A Subrange is not covered by the main range", MF, LI); + } + } else if (LI.hasSubRanges()) { + report("subregister liveness only allowed for virtual registers", MF, LI); + } + // Check the LI only has one connected component. if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { ConnectedVNInfoEqClasses ConEQ(*LiveInts); diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 95a2934afb40..a1042e720c37 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "phi-opt" @@ -66,7 +67,7 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { return false; MRI = &Fn.getRegInfo(); - TII = Fn.getTarget().getInstrInfo(); + TII = Fn.getSubtarget().getInstrInfo(); // Find dead PHI cycles and PHI cycles that can be replaced by a single // value. InstCombine does these optimizations, but DAG legalization may @@ -91,7 +92,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned DstReg = MI->getOperand(0).getReg(); // See if we already saw this register. - if (!PHIsInCycle.insert(MI)) + if (!PHIsInCycle.insert(MI).second) return true; // Don't scan crazily complex things. @@ -136,7 +137,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { "PHI destination is not a virtual register"); // See if we already saw this register. - if (!PHIsInCycle.insert(MI)) + if (!PHIsInCycle.insert(MI).second) return true; // Don't scan crazily complex things. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c8d0819ad13c..def2e3d48ac0 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -150,9 +150,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { Changed |= EliminatePHINodes(MF, *I); // Remove dead IMPLICIT_DEF instructions. - for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(), - E = ImpDefs.end(); I != E; ++I) { - MachineInstr *DefMI = *I; + for (MachineInstr *DefMI : ImpDefs) { unsigned DefReg = DefMI->getOperand(0).getReg(); if (MRI->use_nodbg_empty(DefReg)) { if (LIS) @@ -240,7 +238,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. - const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (isSourceDefinedByImplicitDef(MPhi, MRI)) // If all sources of a PHI node are implicit_def, just emit an // implicit_def instead of a copy. @@ -369,7 +367,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. - if (!MBBsInsertedInto.insert(&opBlock)) + if (!MBBsInsertedInto.insert(&opBlock).second) continue; // If the copy has already been emitted, we're done. // Find a safe location to insert the copy, this may be the first terminator diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h index 48234ae0fbdd..b997d7ac5f4f 100644 --- a/lib/CodeGen/PHIEliminationUtils.h +++ b/lib/CodeGen/PHIEliminationUtils.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H -#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H +#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H +#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 249b2d0f6bb9..28e9f847a9d7 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; @@ -71,6 +72,8 @@ static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, cl::desc("Disable Codegen Prepare")); static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden, cl::desc("Disable Copy Propagation pass")); +static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining", + cl::Hidden, cl::desc("Disable Partial Libcall Inlining")); static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, @@ -97,6 +100,10 @@ static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); +static cl::opt<bool> UseCFLAA("use-cfl-aa-in-codegen", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis in CodeGen")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -228,8 +235,8 @@ TargetPassConfig::~TargetPassConfig() { // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), - Started(true), Stopped(false), TM(tm), Impl(nullptr), Initialized(false), - DisableVerify(false), + Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), + Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true) { Impl = new PassConfigImpl(); @@ -297,7 +304,7 @@ IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const { /// a later pass or that it should stop after an earlier pass, then do not add /// the pass. Finally, compare the current pass against the StartAfter /// and StopAfter options and change the Started/Stopped flags accordingly. -void TargetPassConfig::addPass(Pass *P) { +void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { assert(!Initialized && "PassConfig is immutable"); // Cache the Pass ID here in case the pass manager finds this pass is @@ -306,10 +313,21 @@ void TargetPassConfig::addPass(Pass *P) { // and shouldn't reference it. AnalysisID PassID = P->getPassID(); - if (Started && !Stopped) + if (Started && !Stopped) { + std::string Banner; + // Construct banner message before PM->add() as that may delete the pass. + if (AddingMachinePasses && (printAfter || verifyAfter)) + Banner = std::string("After ") + std::string(P->getPassName()); PM->add(P); - else + if (AddingMachinePasses) { + if (printAfter) + addPrintPass(Banner); + if (verifyAfter) + addVerifyPass(Banner); + } + } else { delete P; + } if (StopAfter == PassID) Stopped = true; if (StartAfter == PassID) @@ -323,7 +341,8 @@ void TargetPassConfig::addPass(Pass *P) { /// /// addPass cannot return a pointer to the pass instance because is internal the /// PassManager and the instance we create here may already be freed. -AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { +AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter, + bool printAfter) { IdentifyingPassPtr TargetID = getPassSubstitution(PassID); IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); if (!FinalPtr.isValid()) @@ -338,7 +357,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { llvm_unreachable("Pass ID not registered"); } AnalysisID FinalID = P->getPassID(); - addPass(P); // Ends the lifetime of P. + addPass(P, verifyAfter, printAfter); // Ends the lifetime of P. // Add the passes after the pass P if there is any. for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator @@ -353,18 +372,25 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { NP = Pass::createPass((*I).second.getID()); assert(NP && "Pass ID not registered"); } - addPass(NP); + addPass(NP, false, false); } } return FinalID; } -void TargetPassConfig::printAndVerify(const char *Banner) { +void TargetPassConfig::printAndVerify(const std::string &Banner) { + addPrintPass(Banner); + addVerifyPass(Banner); +} + +void TargetPassConfig::addPrintPass(const std::string &Banner) { if (TM->shouldPrintMachineCode()) - addPass(createMachineFunctionPrinterPass(dbgs(), Banner)); + PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); +} +void TargetPassConfig::addVerifyPass(const std::string &Banner) { if (VerifyMachineCode) - addPass(createMachineVerifierPass(Banner)); + PM->add(createMachineVerifierPass(Banner)); } /// Add common target configurable passes that perform LLVM IR to IR transforms @@ -374,7 +400,10 @@ void TargetPassConfig::addIRPasses() { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. + if (UseCFLAA) + addPass(createCFLAliasAnalysisPass()); addPass(createTypeBasedAliasAnalysisPass()); + addPass(createScopedNoAliasAAPass()); addPass(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input @@ -399,6 +428,9 @@ void TargetPassConfig::addIRPasses() { // Prepare expensive constants for SelectionDAG. if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) addPass(createConstantHoistingPass()); + + if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) + addPass(createPartiallyInlineLibCallsPass()); } /// Turn exception handling constructs into something the code generators can @@ -416,7 +448,8 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::WinEH: + case ExceptionHandling::ItaniumWinEH: + case ExceptionHandling::MSVC: // FIXME: Needs preparation. addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: @@ -433,6 +466,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) addPass(createCodeGenPreparePass(TM)); + addPass(createRewriteSymbolsPass()); } /// Add common passes that perform LLVM IR to IR transforms in preparation for @@ -477,6 +511,8 @@ void TargetPassConfig::addISelPrepare() { /// TODO: We could use a single addPre/Post(ID) hook to allow pass injection /// before/after any target-independent pass. But it's currently overkill. void TargetPassConfig::addMachinePasses() { + AddingMachinePasses = true; + // Insert a machine instr printer pass after the specified pass. // If -print-machineinstrs specified, print machineinstrs after all passes. if (StringRef(PrintMachineInstrs.getValue()).equals("")) @@ -485,7 +521,7 @@ void TargetPassConfig::addMachinePasses() { .equals("option-unspecified")) { const PassRegistry *PR = PassRegistry::getPassRegistry(); const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); - const PassInfo *IPI = PR->getPassInfo(StringRef("print-machineinstrs")); + const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); assert (TPI && IPI && "Pass ID not registered!"); const char *TID = (const char *)(TPI->getTypeInfo()); const char *IID = (const char *)(IPI->getTypeInfo()); @@ -496,8 +532,7 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - if (addPass(&ExpandISelPseudosID)) - printAndVerify("After ExpandISelPseudos"); + addPass(&ExpandISelPseudosID); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -505,12 +540,11 @@ void TargetPassConfig::addMachinePasses() { } else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(&LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID, false); } // Run pre-ra passes. - if (addPreRegAlloc()) - printAndVerify("After PreRegAlloc passes"); + addPreRegAlloc(); // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. @@ -520,12 +554,10 @@ void TargetPassConfig::addMachinePasses() { addFastRegAlloc(createRegAllocPass(false)); // Run post-ra passes. - if (addPostRegAlloc()) - printAndVerify("After PostRegAlloc passes"); + addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... addPass(&PrologEpilogCodeInserterID); - printAndVerify("After PrologEpilogCodeInserter"); /// Add passes that optimize machine instructions after register allocation. if (getOptLevel() != CodeGenOpt::None) @@ -533,11 +565,9 @@ void TargetPassConfig::addMachinePasses() { // Expand pseudo instructions before second scheduling pass. addPass(&ExpandPostRAPseudosID); - printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. - if (addPreSched2()) - printAndVerify("After PreSched2 passes"); + addPreSched2(); // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { @@ -545,63 +575,61 @@ void TargetPassConfig::addMachinePasses() { addPass(&PostMachineSchedulerID); else addPass(&PostRASchedulerID); - printAndVerify("After PostRAScheduler"); } // GC if (addGCPasses()) { if (PrintGCInfo) - addPass(createGCInfoPrinter(dbgs())); + addPass(createGCInfoPrinter(dbgs()), false, false); } // Basic block placement. if (getOptLevel() != CodeGenOpt::None) addBlockPlacement(); - if (addPreEmitPass()) - printAndVerify("After PreEmit passes"); + addPreEmitPass(); - addPass(&StackMapLivenessID); + addPass(&StackMapLivenessID, false); + + AddingMachinePasses = false; } /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(&EarlyTailDuplicateID)) - printAndVerify("After Pre-RegAlloc TailDuplicate"); + addPass(&EarlyTailDuplicateID); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - addPass(&OptimizePHIsID); + addPass(&OptimizePHIsID, false); // This pass merges large allocas. StackSlotColoring is a different pass // which merges spill slots. - addPass(&StackColoringID); + addPass(&StackColoringID, false); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. - addPass(&LocalStackSlotAllocationID); + addPass(&LocalStackSlotAllocationID, false); // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). addPass(&DeadMachineInstructionElimID); - printAndVerify("After codegen DCE pass"); // Allow targets to insert passes that improve instruction level parallelism, // like if-conversion. Such passes will typically need dominator trees and // loop info, just like LICM and CSE below. - if (addILPOpts()) - printAndVerify("After ILP optimizations"); + addILPOpts(); - addPass(&MachineLICMID); - addPass(&MachineCSEID); + addPass(&MachineLICMID, false); + addPass(&MachineCSEID, false); addPass(&MachineSinkingID); - printAndVerify("After Machine LICM, CSE and Sinking passes"); - addPass(&PeepholeOptimizerID); - printAndVerify("After codegen peephole optimization pass"); + addPass(&PeepholeOptimizerID, false); + // Clean-up the dead code that may have been generated by peephole + // rewriting. + addPass(&DeadMachineInstructionElimID); } //===---------------------------------------------------------------------===// @@ -675,21 +703,26 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { return createTargetRegisterAllocator(Optimized); } +/// Return true if the default global register allocator is in use and +/// has not be overriden on the command line with '-regalloc=...' +bool TargetPassConfig::usingDefaultRegAlloc() const { + return RegAlloc.getNumOccurrences() == 0; +} + /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { - addPass(&PHIEliminationID); - addPass(&TwoAddressInstructionPassID); + addPass(&PHIEliminationID, false); + addPass(&TwoAddressInstructionPassID, false); addPass(RegAllocPass); - printAndVerify("After Register Allocation"); } /// Add standard target-independent passes that are tightly coupled with /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { - addPass(&ProcessImplicitDefsID); + addPass(&ProcessImplicitDefsID, false); // LiveVariables currently requires pure SSA form. // @@ -697,34 +730,30 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // LiveVariables can be removed completely, and LiveIntervals can be directly // computed. (We still either need to regenerate kill flags after regalloc, or // preferably fix the scavenger to not depend on them). - addPass(&LiveVariablesID); + addPass(&LiveVariablesID, false); // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID); - addPass(&PHIEliminationID); + addPass(&MachineLoopInfoID, false); + addPass(&PHIEliminationID, false); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) - addPass(&LiveIntervalsID); + addPass(&LiveIntervalsID, false); - addPass(&TwoAddressInstructionPassID); + addPass(&TwoAddressInstructionPassID, false); addPass(&RegisterCoalescerID); // PreRA instruction scheduling. - if (addPass(&MachineSchedulerID)) - printAndVerify("After Machine Scheduling"); + addPass(&MachineSchedulerID); // Add the selected register allocation pass. addPass(RegAllocPass); - printAndVerify("After Register Allocation, before rewriter"); // Allow targets to change the register assignments before rewriting. - if (addPreRewrite()) - printAndVerify("After pre-rewrite passes"); + addPreRewrite(); // Finally rewrite virtual registers. addPass(&VirtRegRewriterID); - printAndVerify("After Virtual Register Rewriter"); // Perform stack slot coloring and post-ra machine LICM. // @@ -736,8 +765,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // // FIXME: can this move into MachineLateOptimization? addPass(&PostRAMachineLICMID); - - printAndVerify("After StackSlotColoring and postra Machine LICM"); } //===---------------------------------------------------------------------===// @@ -747,34 +774,30 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { /// Add passes that optimize machine instructions after register allocation. void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. - if (addPass(&BranchFolderPassID)) - printAndVerify("After BranchFolding"); + addPass(&BranchFolderPassID); // Tail duplication. // Note that duplicating tail just increases code size and degrades // performance for targets that require Structured Control Flow. // In addition it can also make CFG irreducible. Thus we disable it. - if (!TM->requiresStructuredCFG() && addPass(&TailDuplicateID)) - printAndVerify("After TailDuplicate"); + if (!TM->requiresStructuredCFG()) + addPass(&TailDuplicateID); // Copy propagation. - if (addPass(&MachineCopyPropagationID)) - printAndVerify("After copy propagation pass"); + addPass(&MachineCopyPropagationID); } /// Add standard GC passes. bool TargetPassConfig::addGCPasses() { - addPass(&GCMachineCodeAnalysisID); + addPass(&GCMachineCodeAnalysisID, false); return true; } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - if (addPass(&MachineBlockPlacementID)) { + if (addPass(&MachineBlockPlacementID, false)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); - - printAndVerify("After machine block placement."); } } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 716cb1f46f15..283d1f26198b 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -46,7 +46,7 @@ // if it loads to virtual registers and the virtual register defined has // a single use. // -// - Optimize Copies and Bitcast: +// - Optimize Copies and Bitcast (more generally, target specific copies): // // Rewrite copies and bitcasts to avoid cross register bank copies // when possible. @@ -78,6 +78,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <utility> using namespace llvm; #define DEBUG_TYPE "peephole-opt" @@ -92,7 +94,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); static cl::opt<bool> -DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true), +DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); STATISTIC(NumReuse, "Number of extension results reused"); @@ -100,12 +102,13 @@ STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); -STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized"); +STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); +STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); namespace { class PeepholeOptimizer : public MachineFunctionPass { - const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; MachineDominatorTree *DT; // Machine dominator tree @@ -129,9 +132,15 @@ namespace { private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &LocalMIs); - bool optimizeSelect(MachineInstr *MI); + SmallPtrSetImpl<MachineInstr*> &LocalMIs); + bool optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool optimizeCondBranch(MachineInstr *MI); bool optimizeCopyOrBitcast(MachineInstr *MI); + bool optimizeCoalescableCopy(MachineInstr *MI); + bool optimizeUncoalescableCopy(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool findNextSource(unsigned &Reg, unsigned &SubReg); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); @@ -140,6 +149,25 @@ namespace { DenseMap<unsigned, MachineInstr*> &ImmDefMIs); bool isLoadFoldable(MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); + + /// \brief Check whether \p MI is understood by the register coalescer + /// but may require some rewriting. + bool isCoalescableCopy(const MachineInstr &MI) { + // SubregToRegs are not interesting, because they are already register + // coalescer friendly. + return MI.isCopy() || (!DisableAdvCopyOpt && + (MI.isRegSequence() || MI.isInsertSubreg() || + MI.isExtractSubreg())); + } + + /// \brief Check whether \p MI is a copy like instruction that is + /// not recognized by the register coalescer. + bool isUncoalescableCopy(const MachineInstr &MI) { + return MI.isBitcast() || + (!DisableAdvCopyOpt && + (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())); + } }; /// \brief Helper class to track the possible sources of a value defined by @@ -176,63 +204,87 @@ namespace { /// the ValueTracker class but that would have complicated the code of /// the users of this class. bool UseAdvancedTracking; - /// Optional MachineRegisterInfo used to perform some complex + /// MachineRegisterInfo used to perform tracking. + const MachineRegisterInfo &MRI; + /// Optional TargetInstrInfo used to perform some complex /// tracking. - const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; /// \brief Dispatcher to the right underlying implementation of /// getNextSource. - bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for Copy instructions. - bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for Bitcast instructions. - bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for RegSequence /// instructions. - bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for InsertSubreg /// instructions. - bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for ExtractSubreg /// instructions. - bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for SubregToReg /// instructions. - bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); public: - /// \brief Create a ValueTracker instance for the value defines by \p MI - /// at the operand index \p DefIdx. + /// \brief Create a ValueTracker instance for the value defined by \p Reg. /// \p DefSubReg represents the sub register index the value tracker will - /// track. It does not need to match the sub register index used in \p MI. + /// track. It does not need to match the sub register index used in the + /// definition of \p Reg. /// \p UseAdvancedTracking specifies whether or not the value tracker looks /// through complex instructions. By default (false), it handles only copy /// and bitcast instructions. - /// \p MRI useful to perform some complex checks. + /// If \p Reg is a physical register, a value tracker constructed with + /// this constructor will not find any alternative source. + /// Indeed, when \p Reg is a physical register that constructor does not + /// know which definition of \p Reg it should track. + /// Use the next constructor to track a physical register. + ValueTracker(unsigned Reg, unsigned DefSubReg, + const MachineRegisterInfo &MRI, + bool UseAdvancedTracking = false, + const TargetInstrInfo *TII = nullptr) + : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); + } + } + + /// \brief Create a ValueTracker instance for the value defined by + /// the pair \p MI, \p DefIdx. + /// Unlike the other constructor, the value tracker produced by this one + /// may be able to find a new source when the definition is a physical + /// register. + /// This could be useful to rewrite target specific instructions into + /// generic copy instructions. ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, + const MachineRegisterInfo &MRI, bool UseAdvancedTracking = false, - const MachineRegisterInfo *MRI = nullptr) + const TargetInstrInfo *TII = nullptr) : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), - UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) { - assert(Def->getOperand(DefIdx).isDef() && - Def->getOperand(DefIdx).isReg() && - "Definition does not match machine instruction"); - // Initially the value is in the defined register. + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + assert(DefIdx < Def->getDesc().getNumDefs() && + Def->getOperand(DefIdx).isReg() && "Invalid definition"); Reg = Def->getOperand(DefIdx).getReg(); } /// \brief Following the use-def chain, get the next available source /// for the tracked value. - /// When the returned value is not nullptr, getReg() gives the register + /// When the returned value is not nullptr, \p SrcReg gives the register /// that contain the tracked value. /// \note The sub register index returned in \p SrcSubReg must be used - /// on that getReg() to access the actual value. + /// on \p SrcReg to access the actual value. /// \return Unless the returned value is nullptr (i.e., no source found), - /// \p SrcIdx gives the index of the next source in the returned - /// instruction and \p SrcSubReg the index to be used on that source to - /// get the tracked value. When nullptr is returned, no alternative source - /// has been found. - const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \p SrcReg gives the register of the next source used in the returned + /// instruction and \p SrcSubReg the sub-register index to be used on that + /// source to get the tracked value. When nullptr is returned, no + /// alternative source has been found. + const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Get the last register where the initial value can be found. /// Initially this is the register of the definition. @@ -261,7 +313,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet<MachineInstr*, 8> &LocalMIs) { + SmallPtrSetImpl<MachineInstr*> &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; @@ -277,7 +329,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Ensure DstReg can get a register class that actually supports // sub-registers. Don't change the class until we commit. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); + DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); if (!DstRC) return false; @@ -286,8 +338,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // register. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. - bool UseSrcSubIdx = TM->getRegisterInfo()-> - getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; + bool UseSrcSubIdx = + TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. @@ -431,7 +483,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, } /// Optimize a select instruction. -bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs) { unsigned TrueOp = 0; unsigned FalseOp = 0; bool Optimizable = false; @@ -440,13 +493,19 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return false; if (!Optimizable) return false; - if (!TII->optimizeSelect(MI)) + if (!TII->optimizeSelect(MI, LocalMIs)) return false; MI->eraseFromParent(); ++NumSelects; return true; } +/// \brief Check if a simpler conditional branch can be +// generated +bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { + return TII->optimizeCondBranch(MI); +} + /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) /// share the same register file. static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, @@ -477,85 +536,34 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; } -/// \brief Get the index of the definition and source for \p Copy -/// instruction. -/// \pre Copy.isCopy() or Copy.isBitcast(). -/// \return True if the Copy instruction has only one register source -/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx -/// are invalid. -static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, - unsigned &DefIdx, unsigned &SrcIdx) { - assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); - if (Copy.isCopy()) { - // Copy instruction are supposed to be: Def = Src. - if (Copy.getDesc().getNumOperands() != 2) - return false; - DefIdx = 0; - SrcIdx = 1; - assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); - return true; - } - // Bitcast case. - // Bitcasts with more than one def are not supported. - if (Copy.getDesc().getNumDefs() != 1) - return false; - // Initialize SrcIdx to an undefined operand. - SrcIdx = Copy.getDesc().getNumOperands(); - for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { - const MachineOperand &MO = Copy.getOperand(OpIdx); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef()) - DefIdx = OpIdx; - else if (SrcIdx != EndOpIdx) - // Multiple sources? - return false; - SrcIdx = OpIdx; - } - return true; -} - -/// \brief Optimize a copy or bitcast instruction to avoid cross -/// register bank copy. The optimization looks through a chain of -/// copies and try to find a source that has a compatible register -/// class. -/// Two register classes are considered to be compatible if they share -/// the same register bank. -/// New copies issued by this optimization are register allocator -/// friendly. This optimization does not remove any copy as it may -/// overconstraint the register allocator, but replaces some when -/// possible. -/// \pre \p MI is a Copy (MI->isCopy() is true) -/// \return True, when \p MI has been optimized. In that case, \p MI has -/// been removed from its parent. -bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { - unsigned DefIdx, SrcIdx; - if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx)) +/// \brief Try to find the next source that share the same register file +/// for the value defined by \p Reg and \p SubReg. +/// When true is returned, \p Reg and \p SubReg are updated with the +/// register number and sub-register index of the new source. +/// \return False if no alternative sources are available. True otherwise. +bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { + // Do not try to find a new source for a physical register. + // So far we do not have any motivating example for doing that. + // Thus, instead of maintaining untested code, we will revisit that if + // that changes at some point. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) return false; - const MachineOperand &MODef = MI->getOperand(DefIdx); - assert(MODef.isReg() && "Copies must be between registers."); - unsigned Def = MODef.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(Def)) - return false; - - const TargetRegisterClass *DefRC = MRI->getRegClass(Def); - unsigned DefSubReg = MODef.getSubReg(); + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + unsigned DefSubReg = SubReg; unsigned Src; unsigned SrcSubReg; bool ShouldRewrite = false; - const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); // Follow the chain of copies until we reach the top of the use-def chain // or find a more suitable source. - ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI); + ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); do { - unsigned CopySrcIdx, CopySrcSubReg; - if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg)) + unsigned CopySrcReg, CopySrcSubReg; + if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) break; - Src = ValTracker.getReg(); + Src = CopySrcReg; SrcSubReg = CopySrcSubReg; // Do not extend the live-ranges of physical registers as they add @@ -569,29 +577,411 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); // If this source does not incur a cross register bank copy, use it. - ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, + ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, SrcSubReg); } while (!ShouldRewrite); // If we did not find a more suitable source, there is nothing to optimize. - if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) + if (!ShouldRewrite || Src == Reg) + return false; + + Reg = Src; + SubReg = SrcSubReg; + return true; +} + +namespace { +/// \brief Helper class to rewrite the arguments of a copy-like instruction. +class CopyRewriter { +protected: + /// The copy-like instruction. + MachineInstr &CopyLike; + /// The index of the source being rewritten. + unsigned CurrentSrcIdx; + +public: + CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} + + virtual ~CopyRewriter() {} + + /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and + /// the related value that it affects (TrackReg, TrackSubReg). + /// A source is considered rewritable if its register class and the + /// register class of the related TrackReg may not be register + /// coalescer friendly. In other words, given a copy-like instruction + /// not all the arguments may be returned at rewritable source, since + /// some arguments are none to be register coalescer friendly. + /// + /// Each call of this method moves the current source to the next + /// rewritable source. + /// For instance, let CopyLike be the instruction to rewrite. + /// CopyLike has one definition and one source: + /// dst.dstSubIdx = CopyLike src.srcSubIdx. + /// + /// The first call will give the first rewritable source, i.e., + /// the only source this instruction has: + /// (SrcReg, SrcSubReg) = (src, srcSubIdx). + /// This source defines the whole definition, i.e., + /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). + /// + /// The second and subsequent calls will return false, has there is only one + /// rewritable source. + /// + /// \return True if a rewritable source has been found, false otherwise. + /// The output arguments are valid if and only if true is returned. + virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) { + // If CurrentSrcIdx == 1, this means this function has already been + // called once. CopyLike has one defintiion and one argument, thus, + // there is nothing else to rewrite. + if (!CopyLike.isCopy() || CurrentSrcIdx == 1) + return false; + // This is the first call to getNextRewritableSource. + // Move the CurrentSrcIdx to remember that we made that call. + CurrentSrcIdx = 1; + // The rewritable source is the argument. + const MachineOperand &MOSrc = CopyLike.getOperand(1); + SrcReg = MOSrc.getReg(); + SrcSubReg = MOSrc.getSubReg(); + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + return true; + } + + /// \brief Rewrite the current source with \p NewReg and \p NewSubReg + /// if possible. + /// \return True if the rewritting was possible, false otherwise. + virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { + if (!CopyLike.isCopy() || CurrentSrcIdx != 1) + return false; + MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); + MOSrc.setReg(NewReg); + MOSrc.setSubReg(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for INSERT_SUBREG instruction. +class InsertSubregRewriter : public CopyRewriter { +public: + InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { + assert(MI.isInsertSubreg() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. + /// Src1 has the same register class has dst, hence, there is + /// nothing to rewrite. + /// Src2.src2SubIdx, may not be register coalescer friendly. + /// Therefore, the first call to this method returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx). + /// + /// Subsequence calls will return false. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 2) + return false; + // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. + CurrentSrcIdx = 2; + const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); + SrcReg = MOInsertedReg.getReg(); + SrcSubReg = MOInsertedReg.getSubReg(); + const MachineOperand &MODef = CopyLike.getOperand(0); + + // We want to track something that is compatible with the + // partial definition. + TrackReg = MODef.getReg(); + if (MODef.getSubReg()) + // Bails if we have to compose sub-register indices. + return false; + TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); + return true; + } + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + if (CurrentSrcIdx != 2) + return false; + // We are rewriting the inserted reg. + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for EXTRACT_SUBREG instruction. +class ExtractSubregRewriter : public CopyRewriter { + const TargetInstrInfo &TII; + +public: + ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) + : CopyRewriter(MI), TII(TII) { + assert(MI.isExtractSubreg() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. + /// There is only one rewritable source: Src.subIdx, + /// which defines dst.dstSubIdx. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 1) + return false; + // We are looking at v1 = EXTRACT_SUBREG v0, sub0. + CurrentSrcIdx = 1; + const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); + SrcReg = MOExtractedReg.getReg(); + // If we have to compose sub-register indices, bails out. + if (MOExtractedReg.getSubReg()) + return false; + + SrcSubReg = CopyLike.getOperand(2).getImm(); + + // We want to track something that is compatible with the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + return true; + } + + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + // The only source we can rewrite is the input register. + if (CurrentSrcIdx != 1) + return false; + + CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); + + // If we find a source that does not require to extract something, + // rewrite the operation with a copy. + if (!NewSubReg) { + // Move the current index to an invalid position. + // We do not want another call to this method to be able + // to do any change. + CurrentSrcIdx = -1; + // Rewrite the operation as a COPY. + // Get rid of the sub-register index. + CopyLike.RemoveOperand(2); + // Morph the operation into a COPY. + CopyLike.setDesc(TII.get(TargetOpcode::COPY)); + return true; + } + CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for REG_SEQUENCE instruction. +class RegSequenceRewriter : public CopyRewriter { +public: + RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { + assert(MI.isRegSequence() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. + /// Each call will return a different source, walking all the available + /// source. + /// + /// The first call returns: + /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx1). + /// + /// The second call returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx2). + /// + /// And so on, until all the sources have been traversed, then + /// it returns false. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. + + // If this is the first call, move to the first argument. + if (CurrentSrcIdx == 0) { + CurrentSrcIdx = 1; + } else { + // Otherwise, move to the next argument and check that it is valid. + CurrentSrcIdx += 2; + if (CurrentSrcIdx >= CopyLike.getNumOperands()) + return false; + } + const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); + SrcReg = MOInsertedReg.getReg(); + // If we have to compose sub-register indices, bails out. + if ((SrcSubReg = MOInsertedReg.getSubReg())) + return false; + + // We want to track something that is compatible with the related + // partial definition. + TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); + + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + // If we have to compose sub-registers, bails. + return MODef.getSubReg() == 0; + } + + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + // We cannot rewrite out of bound operands. + // Moreover, rewritable sources are at odd positions. + if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) + return false; + + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } +}; +} // End namespace. + +/// \brief Get the appropriated CopyRewriter for \p MI. +/// \return A pointer to a dynamically allocated CopyRewriter or nullptr +/// if no rewriter works for \p MI. +static CopyRewriter *getCopyRewriter(MachineInstr &MI, + const TargetInstrInfo &TII) { + switch (MI.getOpcode()) { + default: + return nullptr; + case TargetOpcode::COPY: + return new CopyRewriter(MI); + case TargetOpcode::INSERT_SUBREG: + return new InsertSubregRewriter(MI); + case TargetOpcode::EXTRACT_SUBREG: + return new ExtractSubregRewriter(MI, TII); + case TargetOpcode::REG_SEQUENCE: + return new RegSequenceRewriter(MI); + } + llvm_unreachable(nullptr); +} + +/// \brief Optimize generic copy instructions to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and tries to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstraint the register allocator, but replaces some operands +/// when possible. +/// \pre isCoalescableCopy(*MI) is true. +/// \return True, when \p MI has been rewritten. False otherwise. +bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { + assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); + assert(MI->getDesc().getNumDefs() == 1 && + "Coalescer can understand multiple defs?!"); + const MachineOperand &MODef = MI->getOperand(0); + // Do not rewrite physical definitions. + if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) return false; - // Rewrite the copy to avoid a cross register bank penalty. - unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def : - MRI->createVirtualRegister(DefRC); - MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) - .addReg(Src, 0, SrcSubReg); - NewCopy->getOperand(0).setSubReg(DefSubReg); - - MRI->replaceRegWith(Def, NewVR); - MRI->clearKillFlags(NewVR); - // We extended the lifetime of Src. - // Clear the kill flags to account for that. - MRI->clearKillFlags(Src); + bool Changed = false; + // Get the right rewriter for the current copy. + std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); + // If none exists, bails out. + if (!CpyRewriter) + return false; + // Rewrite each rewritable source. + unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; + while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, + TrackSubReg)) { + unsigned NewSrc = TrackReg; + unsigned NewSubReg = TrackSubReg; + // Try to find a more suitable source. + // If we failed to do so, or get the actual source, + // move to the next source. + if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) + continue; + // Rewrite source. + if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { + // We may have extended the live-range of NewSrc, account for that. + MRI->clearKillFlags(NewSrc); + Changed = true; + } + } + // TODO: We could have a clean-up method to tidy the instruction. + // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 + // => v0 = COPY v1 + // Currently we haven't seen motivating example for that and we + // want to avoid untested code. + NumRewrittenCopies += Changed == true; + return Changed; +} + +/// \brief Optimize copy-like instructions to create +/// register coalescer friendly instruction. +/// The optimization tries to kill-off the \p MI by looking +/// through a chain of copies to find a source that has a compatible +/// register class. +/// If such a source is found, it replace \p MI by a generic COPY +/// operation. +/// \pre isUncoalescableCopy(*MI) is true. +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +/// All COPY instructions created, are inserted in \p LocalMIs. +bool PeepholeOptimizer::optimizeUncoalescableCopy( + MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { + assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); + + // Check if we can rewrite all the values defined by this instruction. + SmallVector< + std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, + 4> RewritePairs; + for (const MachineOperand &MODef : MI->defs()) { + if (MODef.isDead()) + // We can ignore those. + continue; + + // If a physical register is here, this is probably for a good reason. + // Do not rewrite that. + if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + return false; + + // If we do not know how to rewrite this definition, there is no point + // in trying to kill this instruction. + TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); + TargetInstrInfo::RegSubRegPair Src = Def; + if (!findNextSource(Src.Reg, Src.SubReg)) + return false; + RewritePairs.push_back(std::make_pair(Def, Src)); + } + // The change is possible for all defs, do it. + for (const auto &PairDefSrc : RewritePairs) { + const auto &Def = PairDefSrc.first; + const auto &Src = PairDefSrc.second; + // Rewrite the "copy" in a way the register coalescer understands. + assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + "We do not rewrite physical registers"); + const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); + unsigned NewVR = MRI->createVirtualRegister(DefRC); + MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + NewVR).addReg(Src.Reg, 0, Src.SubReg); + NewCopy->getOperand(0).setSubReg(Def.SubReg); + if (Def.SubReg) + NewCopy->getOperand(0).setIsUndef(); + LocalMIs.insert(NewCopy); + MRI->replaceRegWith(Def.Reg, NewVR); + MRI->clearKillFlags(NewVR); + // We extended the lifetime of Src. + // Clear the kill flags to account for that. + MRI->clearKillFlags(Src.Reg); + } + // MI is now dead. MI->eraseFromParent(); - ++NumCopiesBitcasts; + ++NumUncoalescableCopies; return true; } @@ -673,8 +1063,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; - TM = &MF.getTarget(); - TII = TM->getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; @@ -684,7 +1074,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; - SmallPtrSet<MachineInstr*, 8> LocalMIs; + + // During this forward scan, at some point it needs to answer the question + // "given a pointer to an MI in the current BB, is it located before or + // after the current instruction". + // To perform this, the following set keeps track of the MIs already seen + // during the scan, if a MI is not in the set, it is assumed to be located + // after. Newly created MIs have to be inserted in the set as well. + SmallPtrSet<MachineInstr*, 16> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; SmallSet<unsigned, 16> FoldAsLoadDefCandidates; @@ -711,15 +1108,27 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefCandidates.clear(); - if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || + if ((isUncoalescableCopy(*MI) && + optimizeUncoalescableCopy(MI, LocalMIs)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || - (MI->isSelect() && optimizeSelect(MI))) { + (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { // MI is deleted. LocalMIs.erase(MI); Changed = true; continue; } + if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { + Changed = true; + continue; + } + + if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { + // MI is just rewritten. + Changed = true; + continue; + } + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { @@ -781,24 +1190,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } -bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands"); + assert(Def->getNumOperands() == 2 && "Invalid number of operands"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. // Bails as we do not support composing subreg yet. return false; // Otherwise, we want the whole source. - SrcIdx = 1; - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + const MachineOperand &Src = Def->getOperand(1); + SrcReg = Src.getReg(); + SrcSubReg = Src.getSubReg(); return true; } -bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def->isBitcast() && "Invalid definition"); @@ -814,7 +1224,7 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, // Bails as we do not support composing subreg yet. return false; - SrcIdx = Def->getDesc().getNumOperands(); + unsigned SrcIdx = Def->getNumOperands(); for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { const MachineOperand &MO = Def->getOperand(OpIdx); @@ -826,13 +1236,16 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, return false; SrcIdx = OpIdx; } - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + const MachineOperand &Src = Def->getOperand(SrcIdx); + SrcReg = Src.getReg(); + SrcSubReg = Src.getSubReg(); return true; } -bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg) { - assert(Def->isRegSequence() && "Invalid definition"); + assert((Def->isRegSequence() || Def->isRegSequenceLike()) && + "Invalid definition"); if (Def->getOperand(DefIdx).getSubReg()) // If we are composing subreg, bails out. @@ -851,19 +1264,26 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, // turn that into an assertion. return false; + if (!TII) + // We could handle the REG_SEQUENCE here, but we do not want to + // duplicate the code from the generic TII. + return false; + + SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; + if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) + return false; + // We are looking at: // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... // Check if one of the operand defines the subreg we are interested in. - for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands(); - OpIdx != EndOpIdx; OpIdx += 2) { - const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1); - assert(MOSubIdx.isImm() && - "One of the subindex of the reg_sequence is not an immediate"); - if (MOSubIdx.getImm() == DefSubReg) { - assert(Def->getOperand(OpIdx).isReg() && - "One of the source of the reg_sequence is not a register"); - SrcIdx = OpIdx; - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + for (auto &RegSeqInput : RegSeqInputRegs) { + if (RegSeqInput.SubIdx == DefSubReg) { + if (RegSeqInput.SubReg) + // Bails if we have to compose sub registers. + return false; + + SrcReg = RegSeqInput.Reg; + SrcSubReg = RegSeqInput.SubReg; return true; } } @@ -874,61 +1294,68 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, return false; } -bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg) { - assert(Def->isInsertSubreg() && "Invalid definition"); + assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && + "Invalid definition"); + if (Def->getOperand(DefIdx).getSubReg()) // If we are composing subreg, bails out. // Same remark as getNextSourceFromRegSequence. // I.e., this may be turned into an assert. return false; + if (!TII) + // We could handle the REG_SEQUENCE here, but we do not want to + // duplicate the code from the generic TII. + return false; + + TargetInstrInfo::RegSubRegPair BaseReg; + TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; + if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) + return false; + // We are looking at: // Def = INSERT_SUBREG v0, v1, sub1 // There are two cases: // 1. DefSubReg == sub1, get v1. // 2. DefSubReg != sub1, the value may be available through v0. - // #1 Check if the inserted register matches the require sub index. - unsigned InsertedSubReg = Def->getOperand(3).getImm(); - if (InsertedSubReg == DefSubReg) { - SrcIdx = 2; - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + // #1 Check if the inserted register matches the required sub index. + if (InsertedReg.SubIdx == DefSubReg) { + SrcReg = InsertedReg.Reg; + SrcSubReg = InsertedReg.SubReg; return true; } // #2 Otherwise, if the sub register we are looking for is not partial // defined by the inserted element, we can look through the main // register (v0). - // To check the overlapping we need a MRI and a TRI. - if (!MRI) - return false; - const MachineOperand &MODef = Def->getOperand(DefIdx); - const MachineOperand &MOBase = Def->getOperand(1); // If the result register (Def) and the base register (v0) do not // have the same register class or if we have to compose // subregisters, bails out. - if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) || - MOBase.getSubReg()) + if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || + BaseReg.SubReg) return false; - // Get the TRI and check if inserted sub register overlaps with the - // sub register we are tracking. - const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + // Get the TRI and check if the inserted sub-register overlaps with the + // sub-register we are tracking. + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); if (!TRI || (TRI->getSubRegIndexLaneMask(DefSubReg) & - TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0) + TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) return false; // At this point, the value is available in v0 via the same subreg // we used for Def. - SrcIdx = 1; + SrcReg = BaseReg.Reg; SrcSubReg = DefSubReg; return true; } -bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg) { - assert(Def->isExtractSubreg() && "Invalid definition"); + assert((Def->isExtractSubreg() || + Def->isExtractSubregLike()) && "Invalid definition"); // We are looking at: // Def = EXTRACT_SUBREG v0, sub0 @@ -937,17 +1364,26 @@ bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, if (DefSubReg) return false; + if (!TII) + // We could handle the EXTRACT_SUBREG here, but we do not want to + // duplicate the code from the generic TII. + return false; + + TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; + if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) + return false; + // Bails if we have to compose sub registers. // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. - if (Def->getOperand(1).getSubReg()) + if (ExtractSubregInputReg.SubReg) return false; // Otherwise, the value is available in the v0.sub0. - SrcIdx = 1; - SrcSubReg = Def->getOperand(2).getImm(); + SrcReg = ExtractSubregInputReg.Reg; + SrcSubReg = ExtractSubregInputReg.SubIdx; return true; } -bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def->isSubregToReg() && "Invalid definition"); // We are looking at: @@ -964,37 +1400,37 @@ bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, if (Def->getOperand(2).getSubReg()) return false; - SrcIdx = 2; + SrcReg = Def->getOperand(2).getReg(); SrcSubReg = Def->getOperand(3).getImm(); return true; } -bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) { +bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def && "This method needs a valid definition"); assert( (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); if (Def->isCopy()) - return getNextSourceFromCopy(SrcIdx, SrcSubReg); + return getNextSourceFromCopy(SrcReg, SrcSubReg); if (Def->isBitcast()) - return getNextSourceFromBitcast(SrcIdx, SrcSubReg); + return getNextSourceFromBitcast(SrcReg, SrcSubReg); // All the remaining cases involve "complex" instructions. // Bails if we did not ask for the advanced tracking. if (!UseAdvancedTracking) return false; - if (Def->isRegSequence()) - return getNextSourceFromRegSequence(SrcIdx, SrcSubReg); - if (Def->isInsertSubreg()) - return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg); - if (Def->isExtractSubreg()) - return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg); + if (Def->isRegSequence() || Def->isRegSequenceLike()) + return getNextSourceFromRegSequence(SrcReg, SrcSubReg); + if (Def->isInsertSubreg() || Def->isInsertSubregLike()) + return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); + if (Def->isExtractSubreg() || Def->isExtractSubregLike()) + return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); if (Def->isSubregToReg()) - return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg); + return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); return false; } -const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, +const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, unsigned &SrcSubReg) { // If we reach a point where we cannot move up in the use-def chain, // there is nothing we can get. @@ -1003,20 +1439,18 @@ const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, const MachineInstr *PrevDef = nullptr; // Try to find the next source. - if (getNextSourceImpl(SrcIdx, SrcSubReg)) { + if (getNextSourceImpl(SrcReg, SrcSubReg)) { // Update definition, definition index, and subregister for the // next call of getNextSource. - const MachineOperand &MO = Def->getOperand(SrcIdx); - assert(MO.isReg() && !MO.isDef() && "Source is invalid"); // Update the current register. - Reg = MO.getReg(); + Reg = SrcReg; // Update the return value before moving up in the use-def chain. PrevDef = Def; // If we can still move up in the use-def chain, move to the next // defintion. if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { - Def = MRI->getVRegDef(Reg); - DefIdx = MRI->def_begin(Reg).getOperandNo(); + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); DefSubReg = SrcSubReg; return PrevDef; } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index a1ab3445fa4f..89e1d113ff70 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -41,7 +41,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -98,7 +97,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &Fn) override; - + bool enablePostRAScheduler( const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode &Mode, @@ -137,10 +136,10 @@ namespace { public: SchedulePostRATDList( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, const RegisterClassInfo&, - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs); + MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, + const RegisterClassInfo &, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); ~SchedulePostRATDList(); @@ -193,16 +192,17 @@ INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", "Post RA top-down list latency scheduler", false, false) SchedulePostRATDList::SchedulePostRATDList( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, const RegisterClassInfo &RCI, - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) { - - const TargetMachine &TM = MF.getTarget(); - const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, + const RegisterClassInfo &RCI, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + + const InstrItineraryData *InstrItins = + MF.getSubtarget().getInstrItineraryData(); HazardRec = - TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( + InstrItins, this); assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || MRI.tracksLiveness()) && @@ -265,9 +265,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipOptnoneFunction(*Fn.getFunction())) return false; - TII = Fn.getTarget().getInstrInfo(); + TII = Fn.getSubtarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); @@ -301,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode, + SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); // Loop over all of the basic blocks @@ -560,10 +559,10 @@ void SchedulePostRATDList::ListScheduleTopDown() { if (HT == ScheduleHazardRecognizer::NoHazard) { if (HazardRec->ShouldPreferAnother(CurSUnit)) { if (!NotPreferredSUnit) { - // If this is the first non-preferred node for this cycle, then - // record it and continue searching for a preferred node. If this - // is not the first non-preferred node, then treat it as though - // there had been a hazard. + // If this is the first non-preferred node for this cycle, then + // record it and continue searching for a preferred node. If this + // is not the first non-preferred node, then treat it as though + // there had been a hazard. NotPreferredSUnit = CurSUnit; continue; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 31299272a7a2..b1538006e724 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -138,8 +139,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); assert(WorkList.empty() && "Inconsistent worklist state"); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index b98d210e9d57..385e5a35afba 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -41,6 +41,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -110,8 +111,8 @@ typedef SmallSetVector<int, 8> StackObjSet; /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); @@ -185,8 +186,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { - const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; @@ -239,8 +240,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { /// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { - const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo(); - const TargetFrameLowering *TFI = F.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = F.getFrameInfo(); // Get the callee saved register list... @@ -337,9 +338,9 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { if (CSI.empty()) return; - const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); MachineBasicBlock::iterator I; // Spill using target interface. @@ -445,7 +446,7 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs, /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis<StackProtector>(); bool StackGrowsDown = @@ -515,7 +516,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool EarlyScavengingSlots = (TFI.hasFP(Fn) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(Fn) && @@ -670,7 +671,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { /// prolog and epilog code to the function. /// void PEI::insertPrologEpilogCode(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... TFI.emitPrologue(Fn); @@ -710,8 +711,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { SmallPtrSet<MachineBasicBlock*, 8> Reachable; // Iterate over the reachable blocks in DFS order. - for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > - DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); DFI != DFE; ++DFI) { int SPAdj = 0; // Check the exit state of the DFS stack predecessor. @@ -738,31 +738,24 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj) { - const TargetMachine &TM = Fn.getTarget(); - assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); - const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - bool StackGrowsDown = - TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + assert(Fn.getSubtarget().getRegisterInfo() && + "getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + bool InsideCallSequence = false; + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); - - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; - - SPAdj += Size; + InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); + SPAdj += TII.getSPAdjust(I); MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = std::prev(I); @@ -776,6 +769,13 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, continue; } + // If we are looking at a call sequence, we need to keep track of + // the SP adjustment made by each instruction in the sequence. + // This includes both the frame setup/destroy pseudos (handled above), + // as well as other instructions that have side effects w.r.t the SP. + if (InsideCallSequence) + SPAdj += TII.getSPAdjust(I); + MachineInstr *MI = I; bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -797,6 +797,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, continue; } + // TODO: This code should be commoned with the code for + // PATCHPOINT. There's no good reason for the difference in + // implementation other than historical accident. The only + // remaining difference is the unconditional use of the stack + // pointer as the base register. + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI->isDebugValue() || i == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(i + 1); + const unsigned refOffset = + TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), + Reg); + + Offset.setImm(Offset.getImm() + refOffset); + MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // Frame allocations are target independent. Simply swap the index with + // the offset. + if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) { + assert(TFI->hasFP(Fn) && "frame alloc requires FP"); + MachineOperand &FI = MI->getOperand(i); + unsigned Reg; + int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg); + FI.ChangeToImmediate(FrameOffset); + continue; + } + // Some instructions (e.g. inline asm instructions) can have // multiple frame indices and/or cause eliminateFrameIndex // to insert more than one instruction. We need the register @@ -837,7 +868,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, /// FIXME: Iterating over the instruction stream is unnecessary. We can simply /// iterate over the vreg use list, which at this point only contains machine /// operands for which eliminateFrameIndex need a new scratch reg. -void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { +void +PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { @@ -888,12 +920,16 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); + MachineRegisterInfo &MRI = Fn.getRegInfo(); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Make sure MRI now accounts this register as used. + MRI.setPhysRegUsed(ScratchReg); // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. - RS->setUsed(ScratchReg); + RS->setRegUsed(ScratchReg); } } diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index 5a6d39a04a22..f88b8ef2b660 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -16,8 +16,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_PEI_H -#define LLVM_CODEGEN_PEI_H +#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H +#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SparseBitVector.h" diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 12b2c902f8d3..b1c341d3a681 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -107,13 +107,9 @@ bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ } bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { - // Negative frame indices are used for special things that don't - // appear in LLVM IR. Non-negative indices may be used for things - // like static allocas. if (!MFI) - return FI >= 0; - // Spill slots should not alias others. - return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI); + return true; + return MFI->isAliasedObjectIndex(FI); } bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 894aee7a9917..6b346f4536cf 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #ifndef NDEBUG #include "llvm/ADT/SparseBitVector.h" @@ -91,6 +90,7 @@ void RegAllocBase::allocatePhysRegs() { // Unused registers can appear when the spiller coalesces snippets. if (MRI->reg_nodbg_empty(VirtReg->reg)) { DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + aboutToRemoveInterval(*VirtReg); LIS->removeInterval(VirtReg->reg); continue; } @@ -102,7 +102,7 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() + << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); typedef SmallVector<unsigned, 4> VirtRegVec; VirtRegVec SplitVRegs; @@ -140,6 +140,7 @@ void RegAllocBase::allocatePhysRegs() { assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + aboutToRemoveInterval(*SplitVirtReg); LIS->removeInterval(SplitVirtReg->reg); continue; } diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index b333c36bff92..659b8f505a22 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -34,8 +34,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_REGALLOCBASE -#define LLVM_CODEGEN_REGALLOCBASE +#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H +#define LLVM_LIB_CODEGEN_REGALLOCBASE_H #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -96,6 +96,9 @@ protected: // Use this group name for NamedRegionTimer. static const char TimerGroupName[]; + /// Method called when the allocator is about to remove a LiveInterval. + virtual void aboutToRemoveInterval(LiveInterval &LI) {} + public: /// VerifyEnabled - True when -verify-regalloc is given. static bool VerifyEnabled; @@ -106,4 +109,4 @@ private: } // end namespace llvm -#endif // !defined(LLVM_CODEGEN_REGALLOCBASE) +#endif diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 6bc678e85211..0090332a8123 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -33,7 +33,6 @@ #include "llvm/PassAnalysisSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include <cstdlib> #include <queue> diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 97b9f7650892..c62141400267 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -33,7 +33,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -53,7 +53,6 @@ namespace { RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), isBulkSpilling(false) {} private: - const TargetMachine *TM; MachineFunction *MF; MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; @@ -298,7 +297,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); + const MDNode *Var = DBG->getDebugVariable(); + const MDNode *Expr = DBG->getDebugExpression(); bool IsIndirect = DBG->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; @@ -308,10 +308,13 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, DL = (--EI)->getDebugLoc(); } else DL = MI->getDebugLoc(); - MachineBasicBlock *MBB = DBG->getParent(); MachineInstr *NewDV = BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(FI) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); + assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } @@ -369,15 +372,23 @@ void RAFast::usePhysReg(MachineOperand &MO) { case regDisabled: break; case regReserved: - assert(TRI->isSuperRegister(PhysReg, Alias) && + // Either PhysReg is a subregister of Alias and we mark the + // whole register as free, or PhysReg is the superregister of + // Alias and we mark all the aliases as disabled before freeing + // PhysReg. + // In the latter case, since PhysReg was disabled, this means that + // its value is defined only by physical sub-registers. This check + // is performed by the assert of the default case in this loop. + // Note: The value of the superregister may only be partial + // defined, that is why regDisabled is a valid state for aliases. + assert((TRI->isSuperRegister(PhysReg, Alias) || + TRI->isSuperRegister(Alias, PhysReg)) && "Instruction is not using a subregister of a reserved register"); - // Leave the superregister in the working set. - PhysRegState[Alias] = regFree; - MO.getParent()->addRegisterKilled(Alias, TRI, true); - return; + // Fall through. case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. + PhysRegState[Alias] = regFree; MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -545,7 +556,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " - << RC->getName() << "\n"); + << TRI->getRegClassName(RC) << "\n"); unsigned BestReg = 0, BestCost = spillImpossible; for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ @@ -705,7 +716,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, continue; if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { - if (ThroughRegs.insert(Reg)) + if (ThroughRegs.insert(Reg).second) DEBUG(dbgs() << ' ' << PrintReg(Reg)); } } @@ -862,13 +873,16 @@ void RAFast::AllocateBasicBlock() { // Modify DBG_VALUE now that the value is in a spill slot. bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *MDPtr = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *MBB = MI->getParent(); MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(SS) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *NewDV); // Scan NewDV operands from the beginning. @@ -1017,8 +1031,7 @@ void RAFast::AllocateBasicBlock() { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? - regFree : regReserved); + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); continue; } LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); @@ -1070,9 +1083,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); - TM = &Fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.clear(); @@ -1093,9 +1105,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { } // Add the clobber lists for all the instructions we skipped earlier. - for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator - I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) - if (const uint16_t *Defs = (*I)->getImplicitDefs()) + for (const MCInstrDesc *Desc : SkippedInstrs) + if (const uint16_t *Defs = Desc->getImplicitDefs()) while (*Defs) MRI->setPhysRegUsed(*Defs++); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index dee990c2eb5f..edc329499c42 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -296,6 +296,9 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + public: RAGreedy(); @@ -311,6 +314,7 @@ public: void enqueue(LiveInterval *LI) override; LiveInterval *dequeue() override; unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; + void aboutToRemoveInterval(LiveInterval &) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -378,6 +382,24 @@ private: SmallVirtRegSet &, unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + unsigned Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + typedef SmallVector<HintInfo, 4> HintsInfo; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); + void collectHintInfo(unsigned, HintsInfo &); }; } // end anonymous namespace @@ -453,7 +475,9 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (VRM->hasPhys(VirtReg)) { - Matrix->unassign(LIS->getInterval(VirtReg)); + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); return true; } // Unassigned virtreg is probably in the priority queue. @@ -514,7 +538,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); - bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && + bool ForceGlobal = !ReverseLocal && (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && @@ -817,7 +841,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { - DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); return 0; } @@ -967,14 +991,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, BCS[B].Exit = SpillPlacement::PrefSpill; if (++B == GroupSize) { - ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); B = 0; } } - ArrayRef<SpillPlacement::BlockConstraint> Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); SpillPlacer->addLinks(makeArrayRef(TBS, T)); } @@ -1013,7 +1035,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. - ArrayRef<unsigned> NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); if (Cand.PhysReg) addThroughConstraints(Cand.Intf, NewBlocks); else @@ -1791,9 +1813,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // instructions. // // Try to guess the size of the new interval. - const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), - Uses[SplitBefore].distance(Uses[SplitAfter]) + - (LiveBefore + LiveAfter)*SlotIndex::InstrDist); + const float EstWeight = normalizeSpillWeight( + blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter) * SlotIndex::InstrDist, + 1); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. DEBUG(dbgs() << " w=" << EstWeight); @@ -2213,6 +2237,11 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, return PhysReg; } +void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { + // Do not keep invalid information around. + SetOfBrokenHints.remove(&LI); +} + void RAGreedy::initializeCSRCost() { // We use the larger one out of the command-line option and the value report // by TRI. @@ -2238,6 +2267,170 @@ void RAGreedy::initializeCSRCost() { CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); } +/// \brief Collect the hint info for \p Reg. +/// The results are stored into \p Out. +/// \p Out is not cleared before being populated. +void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { + for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { + if (!Instr.isFullCopy()) + continue; + // Look for the other end of the copy. + unsigned OtherReg = Instr.getOperand(0).getReg(); + if (OtherReg == Reg) { + OtherReg = Instr.getOperand(1).getReg(); + if (OtherReg == Reg) + continue; + } + // Get the current assignment. + unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + ? OtherReg + : VRM->getPhys(OtherReg); + // Push the collected information. + Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, + OtherPhysReg)); + } +} + +/// \brief Using the given \p List, compute the cost of the broken hints if +/// \p PhysReg was used. +/// \return The cost of \p List for \p PhysReg. +BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, + unsigned PhysReg) { + BlockFrequency Cost = 0; + for (const HintInfo &Info : List) { + if (Info.PhysReg != PhysReg) + Cost += Info.Freq; + } + return Cost; +} + +/// \brief Using the register assigned to \p VirtReg, try to recolor +/// all the live ranges that are copy-related with \p VirtReg. +/// The recoloring is then propagated to all the live-ranges that have +/// been recolored and so on, until no more copies can be coalesced or +/// it is not profitable. +/// For a given live range, profitability is determined by the sum of the +/// frequencies of the non-identity copies it would introduce with the old +/// and new register. +void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { + // We have a broken hint, check if it is possible to fix it by + // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted + // some register and PhysReg may be available for the other live-ranges. + SmallSet<unsigned, 4> Visited; + SmallVector<unsigned, 2> RecoloringCandidates; + HintsInfo Info; + unsigned Reg = VirtReg.reg; + unsigned PhysReg = VRM->getPhys(Reg); + // Start the recoloring algorithm from the input live-interval, then + // it will propagate to the ones that are copy-related with it. + Visited.insert(Reg); + RecoloringCandidates.push_back(Reg); + + DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' + << PrintReg(PhysReg, TRI) << ")\n"); + + do { + Reg = RecoloringCandidates.pop_back_val(); + + // We cannot recolor physcal register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + + // Get the live interval mapped with this virtual register to be able + // to check for the interference with the new color. + LiveInterval &LI = LIS->getInterval(Reg); + unsigned CurrPhys = VRM->getPhys(Reg); + // Check that the new color matches the register class constraints and + // that it is free for this live range. + if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || + Matrix->checkInterference(LI, PhysReg))) + continue; + + DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) + << ") is recolorable.\n"); + + // Gather the hint info. + Info.clear(); + collectHintInfo(Reg, Info); + // Check if recoloring the live-range will increase the cost of the + // non-identity copies. + if (CurrPhys != PhysReg) { + DEBUG(dbgs() << "Checking profitability:\n"); + BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); + BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); + DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() + << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); + if (OldCopiesCost < NewCopiesCost) { + DEBUG(dbgs() << "=> Not profitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is + // equal, we consider this is profitable because it may expose + // more recoloring opportunities. + DEBUG(dbgs() << "=> Profitable.\n"); + // Recolor the live-range. + Matrix->unassign(LI); + Matrix->assign(LI, PhysReg); + } + // Push all copy-related live-ranges to keep reconciling the broken + // hints. + for (const HintInfo &HI : Info) { + if (Visited.insert(HI.Reg).second) + RecoloringCandidates.push_back(HI.Reg); + } + } while (!RecoloringCandidates.empty()); +} + +/// \brief Try to recolor broken hints. +/// Broken hints may be repaired by recoloring when an evicted variable +/// freed up a register for a larger live-range. +/// Consider the following example: +/// BB1: +/// a = +/// b = +/// BB2: +/// ... +/// = b +/// = a +/// Let us assume b gets split: +/// BB1: +/// a = +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// = a +/// Because of how the allocation work, b, c, and d may be assigned different +/// colors. Now, if a gets evicted later: +/// BB1: +/// a = +/// st a, SpillSlot +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// e = ld SpillSlot +/// = e +/// This is likely that we can assign the same register for b, c, and d, +/// getting rid of 2 copies. +void RAGreedy::tryHintsRecoloring() { + for (LiveInterval *LI : SetOfBrokenHints) { + assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + "Recoloring is possible only for virtual registers"); + // Some dead defs may be around (e.g., because of debug uses). + // Ignore those. + if (!VRM->hasPhys(LI->reg)) + continue; + tryHintRecoloring(*LI); + } +} + unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl<unsigned> &NewVRegs, SmallVirtRegSet &FixedRegisters, @@ -2274,8 +2467,18 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // queue. The RS_Split ranges already failed to do this, and they should not // get a second chance until they have been split. if (Stage != RS_Split) - if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) + if (unsigned PhysReg = + tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { + unsigned Hint = MRI->getSimpleHint(VirtReg.reg); + // If VirtReg has a hint and that hint is broken record this + // virtual register as a recoloring candidate for broken hint. + // Indeed, since we evicted a variable in its neighborhood it is + // likely we can at least partially recolor some of the + // copy-related live-ranges. + if (Hint && Hint != PhysReg) + SetOfBrokenHints.insert(&VirtReg); return PhysReg; + } assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); @@ -2319,13 +2522,13 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - const TargetMachine &TM = MF->getTarget(); - TRI = TM.getRegisterInfo(); - TII = TM.getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); RCI.runOnMachineFunction(mf); EnableLocalReassign = EnableLocalReassignment || - TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel()); + MF->getSubtarget().enableRALocalReassignment( + MF->getTarget().getOptLevel()); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2355,8 +2558,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { NextCascade = 1; IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. + SetOfBrokenHints.clear(); allocatePhysRegs(); + tryHintsRecoloring(); releaseMemory(); return true; } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 8a3b53fd08e5..eb7e563352ac 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -49,9 +49,10 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <limits> #include <memory> +#include <queue> #include <set> #include <sstream> #include <vector> @@ -61,17 +62,17 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" static RegisterRegAlloc -registerPBQPRepAlloc("pbqp", "PBQP register allocator", +RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator); static cl::opt<bool> -pbqpCoalescing("pbqp-coalescing", +PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); #ifndef NDEBUG static cl::opt<bool> -pbqpDumpGraphs("pbqp-dump-graphs", +PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden); #endif @@ -88,8 +89,8 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::unique_ptr<PBQPBuilder> b, char *cPassID = nullptr) - : MachineFunctionPass(ID), builder(std::move(b)), customPassID(cPassID) { + RegAllocPBQP(char *cPassID = nullptr) + : MachineFunctionPass(ID), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); @@ -117,301 +118,320 @@ private: typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; typedef std::set<unsigned> RegSet; - std::unique_ptr<PBQPBuilder> builder; - char *customPassID; - MachineFunction *mf; - const TargetMachine *tm; - const TargetRegisterInfo *tri; - const TargetInstrInfo *tii; - MachineRegisterInfo *mri; - const MachineBlockFrequencyInfo *mbfi; - - std::unique_ptr<Spiller> spiller; - LiveIntervals *lis; - LiveStacks *lss; - VirtRegMap *vrm; - - RegSet vregsToAlloc, emptyIntervalVRegs; + RegSet VRegsToAlloc, EmptyIntervalVRegs; /// \brief Finds the initial set of vreg intervals to allocate. - void findVRegIntervalsToAlloc(); + void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); + + /// \brief Constructs an initial graph. + void initializeGraph(PBQPRAGraph &G); /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. - bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, - const PBQP::Solution &solution); + bool mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller); /// \brief Postprocessing before final spilling. Sets basic block "live in" /// variables. - void finalizeAlloc() const; + void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM) const; }; char RegAllocPBQP::ID = 0; -} // End anonymous namespace. - -unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const { - Node2VReg::const_iterator vregItr = node2VReg.find(node); - assert(vregItr != node2VReg.end() && "No vreg for node."); - return vregItr->second; -} - -PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { - VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); - assert(nodeItr != vreg2Node.end() && "No node for vreg."); - return nodeItr->second; - -} +/// @brief Set spill costs for each node in the PBQP reg-alloc graph. +class SpillCosts : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + LiveIntervals &LIS = G.getMetadata().LIS; + + // A minimum spill costs, so that register constraints can can be set + // without normalization in the [0.0:MinSpillCost( interval. + const PBQP::PBQPNum MinSpillCost = 10.0; + + for (auto NId : G.nodeIds()) { + PBQP::PBQPNum SpillCost = + LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; + if (SpillCost == 0.0) + SpillCost = std::numeric_limits<PBQP::PBQPNum>::min(); + else + SpillCost += MinSpillCost; + PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId)); + NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost; + G.setNodeCosts(NId, std::move(NodeCosts)); + } + } +}; -const PBQPRAProblem::AllowedSet& - PBQPRAProblem::getAllowedSet(unsigned vreg) const { - AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg); - assert(allowedSetItr != allowedSets.end() && "No pregs for vreg."); - const AllowedSet &allowedSet = allowedSetItr->second; - return allowedSet; -} +/// @brief Add interference edges between overlapping vregs. +class Interference : public PBQPRAConstraint { +private: -unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { - assert(isPRegOption(vreg, option) && "Not a preg option."); +private: - const AllowedSet& allowedSet = getAllowedSet(vreg); - assert(option <= allowedSet.size() && "Option outside allowed set."); - return allowedSet[option - 1]; -} + typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; + typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey; + typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache; -PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineBlockFrequencyInfo *mbfi, - const RegSet &vregs) { + // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required + // for the fast interference graph construction algorithm. The last is there + // to save us from looking up node ids via the VRegToNode map in the graph + // metadata. + typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId> + IntervalInfo; - LiveIntervals *LIS = const_cast<LiveIntervals*>(lis); - MachineRegisterInfo *mri = &mf->getRegInfo(); - const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + static SlotIndex getStartPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].start; + } - std::unique_ptr<PBQPRAProblem> p(new PBQPRAProblem()); - PBQPRAGraph &g = p->getGraph(); - RegSet pregs; + static SlotIndex getEndPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].end; + } - // Collect the set of preg intervals, record that they're used in the MF. - for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { - if (mri->def_empty(Reg)) - continue; - pregs.insert(Reg); - mri->setPhysRegUsed(Reg); + static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) { + return std::get<2>(I); } - // Iterate over vregs. - for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); - vregItr != vregEnd; ++vregItr) { - unsigned vreg = *vregItr; - const TargetRegisterClass *trc = mri->getRegClass(vreg); - LiveInterval *vregLI = &LIS->getInterval(vreg); + static bool lowestStartPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + // Condition reversed because priority queue has the *highest* element at + // the front, rather than the lowest. + return getStartPoint(I1) > getStartPoint(I2); + } - // Record any overlaps with regmask operands. - BitVector regMaskOverlaps; - LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); + static bool lowestEndPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + SlotIndex E1 = getEndPoint(I1); + SlotIndex E2 = getEndPoint(I2); - // Compute an initial allowed set for the current vreg. - typedef std::vector<unsigned> VRAllowed; - VRAllowed vrAllowed; - ArrayRef<MCPhysReg> rawOrder = trc->getRawAllocationOrder(*mf); - for (unsigned i = 0; i != rawOrder.size(); ++i) { - unsigned preg = rawOrder[i]; - if (mri->isReserved(preg)) - continue; + if (E1 < E2) + return true; - // vregLI crosses a regmask operand that clobbers preg. - if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) - continue; + if (E1 > E2) + return false; - // vregLI overlaps fixed regunit interference. - bool Interference = false; - for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { - if (vregLI->overlaps(LIS->getRegUnit(*Units))) { - Interference = true; - break; - } - } - if (Interference) - continue; - - // preg is usable for this virtual register. - vrAllowed.push_back(preg); - } - - PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0); + // If two intervals end at the same point, we need a way to break the tie or + // the set will assume they're actually equal and refuse to insert a + // "duplicate". Just compare the vregs - fast and guaranteed unique. + return std::get<0>(I1)->reg < std::get<0>(I2)->reg; + } - PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? - vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min(); + static bool isAtLastSegment(const IntervalInfo &I) { + return std::get<1>(I) == std::get<0>(I)->size() - 1; + } - addSpillCosts(nodeCosts, spillCost); + static IntervalInfo nextSegment(const IntervalInfo &I) { + return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I)); + } - // Construct the node. - PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts)); +public: - // Record the mapping and allowed set in the problem. - p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end()); + void apply(PBQPRAGraph &G) override { + // The following is loosely based on the linear scan algorithm introduced in + // "Linear Scan Register Allocation" by Poletto and Sarkar. This version + // isn't linear, because the size of the active set isn't bound by the + // number of registers, but rather the size of the largest clique in the + // graph. Still, we expect this to be better than N^2. + LiveIntervals &LIS = G.getMetadata().LIS; + + // Interferenc matrices are incredibly regular - they're only a function of + // the allowed sets, so we cache them to avoid the overhead of constructing + // and uniquing them. + IMatrixCache C; + + typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; + typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, + decltype(&lowestStartPoint)> IntervalQueue; + IntervalSet Active(lowestEndPoint); + IntervalQueue Inactive(lowestStartPoint); + + // Start by building the inactive set. + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + LiveInterval &LI = LIS.getInterval(VReg); + assert(!LI.empty() && "PBQP graph contains node for empty interval"); + Inactive.push(std::make_tuple(&LI, 0, NId)); + } - } + while (!Inactive.empty()) { + // Tentatively grab the "next" interval - this choice may be overriden + // below. + IntervalInfo Cur = Inactive.top(); + + // Retire any active intervals that end before Cur starts. + IntervalSet::iterator RetireItr = Active.begin(); + while (RetireItr != Active.end() && + (getEndPoint(*RetireItr) <= getStartPoint(Cur))) { + // If this interval has subsequent segments, add the next one to the + // inactive list. + if (!isAtLastSegment(*RetireItr)) + Inactive.push(nextSegment(*RetireItr)); + + ++RetireItr; + } + Active.erase(Active.begin(), RetireItr); + + // One of the newly retired segments may actually start before the + // Cur segment, so re-grab the front of the inactive list. + Cur = Inactive.top(); + Inactive.pop(); + + // At this point we know that Cur overlaps all active intervals. Add the + // interference edges. + PBQP::GraphBase::NodeId NId = getNodeId(Cur); + for (const auto &A : Active) { + PBQP::GraphBase::NodeId MId = getNodeId(A); + + // Check that we haven't already added this edge + // FIXME: findEdge is expensive in the worst case (O(max_clique(G))). + // It might be better to replace this with a local bit-matrix. + if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId()) + continue; - for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); - vr1Itr != vrEnd; ++vr1Itr) { - unsigned vr1 = *vr1Itr; - const LiveInterval &l1 = lis->getInterval(vr1); - const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); - - for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd; - ++vr2Itr) { - unsigned vr2 = *vr2Itr; - const LiveInterval &l2 = lis->getInterval(vr2); - const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); - - assert(!l2.empty() && "Empty interval in vreg set?"); - if (l1.overlaps(l2)) { - PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0); - addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri); - - g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), - std::move(edgeCosts)); + // This is a new edge - add it to the graph. + createInterferenceEdge(G, NId, MId, C); } + + // Finally, add Cur to the Active set. + Active.insert(Cur); } } - return p.release(); -} +private: -void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, - PBQP::PBQPNum spillCost) { - costVec[0] = spillCost; -} + void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, IMatrixCache &C) { -void PBQPBuilder::addInterferenceCosts( - PBQP::Matrix &costMat, - const PBQPRAProblem::AllowedSet &vr1Allowed, - const PBQPRAProblem::AllowedSet &vr2Allowed, - const TargetRegisterInfo *tri) { - assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch."); - assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch."); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); - for (unsigned i = 0; i != vr1Allowed.size(); ++i) { - unsigned preg1 = vr1Allowed[i]; + const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); + const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); - for (unsigned j = 0; j != vr2Allowed.size(); ++j) { - unsigned preg2 = vr2Allowed[j]; + // Try looking the edge costs up in the IMatrixCache first. + IMatrixKey K(&NRegs, &MRegs); + IMatrixCache::iterator I = C.find(K); + if (I != C.end()) { + G.addEdgeBypassingCostAllocator(NId, MId, I->second); + return; + } - if (tri->regsOverlap(preg1, preg2)) { - costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + for (unsigned I = 0; I != NRegs.size(); ++I) { + unsigned PRegN = NRegs[I]; + for (unsigned J = 0; J != MRegs.size(); ++J) { + unsigned PRegM = MRegs[J]; + if (TRI.regsOverlap(PRegN, PRegM)) + M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); } } + + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); + C[K] = G.getEdgeCostsPtr(EId); } -} +}; -PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, - const LiveIntervals *lis, - const MachineBlockFrequencyInfo *mbfi, - const RegSet &vregs) { - std::unique_ptr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, mbfi, vregs)); - PBQPRAGraph &g = p->getGraph(); +class Coalescing : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + MachineFunction &MF = G.getMetadata().MF; + MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; + CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo()); + + // Scan the machine function and add a coalescing cost whenever CoalescerPair + // gives the Ok. + for (const auto &MBB : MF) { + for (const auto &MI : MBB) { + + // Skip not-coalescable or already coalesced copies. + if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) + continue; - const TargetMachine &tm = mf->getTarget(); - CoalescerPair cp(*tm.getRegisterInfo()); + unsigned DstReg = CP.getDstReg(); + unsigned SrcReg = CP.getSrcReg(); - // Scan the machine function and add a coalescing cost whenever CoalescerPair - // gives the Ok. - for (const auto &mbb : *mf) { - for (const auto &mi : mbb) { - if (!cp.setRegisters(&mi)) { - continue; // Not coalescable. - } + const float Scale = 1.0f / MBFI.getEntryFreq(); + PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; - if (cp.getSrcReg() == cp.getDstReg()) { - continue; // Already coalesced. - } + if (CP.isPhys()) { + if (!MF.getRegInfo().isAllocatable(DstReg)) + continue; - unsigned dst = cp.getDstReg(), - src = cp.getSrcReg(); + PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg); - const float copyFactor = 0.5; // Cost of copy relative to load. Current - // value plucked randomly out of the air. + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed = + G.getNodeMetadata(NId).getAllowedRegs(); - PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi); + unsigned PRegOpt = 0; + while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) + ++PRegOpt; - if (cp.isPhys()) { - if (!mf->getRegInfo().isAllocatable(dst)) { - continue; - } - - const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); - unsigned pregOpt = 0; - while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { - ++pregOpt; - } - if (pregOpt < allowed.size()) { - ++pregOpt; // +1 to account for spill option. - PBQPRAGraph::NodeId node = p->getNodeForVReg(src); - llvm::dbgs() << "Reading node costs for node " << node << "\n"; - llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n"; - PBQP::Vector newCosts(g.getNodeCosts(node)); - addPhysRegCoalesce(newCosts, pregOpt, cBenefit); - g.setNodeCosts(node, newCosts); - } - } else { - const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); - const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst); - PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src); - PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2); - if (edge == g.invalidEdgeId()) { - PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0); - addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); - g.addEdge(node1, node2, costs); + if (PRegOpt < Allowed.size()) { + PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId)); + NewCosts[PRegOpt + 1] -= CBenefit; + G.setNodeCosts(NId, std::move(NewCosts)); + } } else { - if (g.getEdgeNode1Id(edge) == node2) { - std::swap(node1, node2); - std::swap(allowed1, allowed2); + PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg); + PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 = + &G.getNodeMetadata(N1Id).getAllowedRegs(); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 = + &G.getNodeMetadata(N2Id).getAllowedRegs(); + + PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id); + if (EId == G.invalidEdgeId()) { + PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1, + Allowed2->size() + 1, 0); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.addEdge(N1Id, N2Id, std::move(Costs)); + } else { + if (G.getEdgeNode1Id(EId) == N2Id) { + std::swap(N1Id, N2Id); + std::swap(Allowed1, Allowed2); + } + PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.setEdgeCosts(EId, std::move(Costs)); } - PBQP::Matrix costs(g.getEdgeCosts(edge)); - addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); - g.setEdgeCosts(edge, costs); } } } } - return p.release(); -} - -void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, - unsigned pregOption, - PBQP::PBQPNum benefit) { - costVec[pregOption] += -benefit; -} - -void PBQPBuilderWithCoalescing::addVirtRegCoalesce( - PBQP::Matrix &costMat, - const PBQPRAProblem::AllowedSet &vr1Allowed, - const PBQPRAProblem::AllowedSet &vr2Allowed, - PBQP::PBQPNum benefit) { - - assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch."); - assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch."); - - for (unsigned i = 0; i != vr1Allowed.size(); ++i) { - unsigned preg1 = vr1Allowed[i]; - for (unsigned j = 0; j != vr2Allowed.size(); ++j) { - unsigned preg2 = vr2Allowed[j]; +private: - if (preg1 == preg2) { - costMat[i + 1][j + 1] += -benefit; + void addVirtRegCoalesce( + PBQPRAGraph::RawMatrix &CostMat, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2, + PBQP::PBQPNum Benefit) { + assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); + assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); + for (unsigned I = 0; I != Allowed1.size(); ++I) { + unsigned PReg1 = Allowed1[I]; + for (unsigned J = 0; J != Allowed2.size(); ++J) { + unsigned PReg2 = Allowed2[J]; + if (PReg1 == PReg2) + CostMat[I + 1][J + 1] -= Benefit; } } } -} +}; + +} // End anonymous namespace. + +// Out-of-line destructor/anchor for PBQPRAConstraint. +PBQPRAConstraint::~PBQPRAConstraint() {} +void PBQPRAConstraint::anchor() {} +void PBQPRAConstraintList::anchor() {} void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesCFG(); @@ -437,118 +457,197 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { MachineFunctionPass::getAnalysisUsage(au); } -void RegAllocPBQP::findVRegIntervalsToAlloc() { +void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, + LiveIntervals &LIS) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); // Iterate over all live ranges. - for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (mri->reg_nodbg_empty(Reg)) + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + if (MRI.reg_nodbg_empty(Reg)) continue; - LiveInterval *li = &lis->getInterval(Reg); + LiveInterval &LI = LIS.getInterval(Reg); // If this live interval is non-empty we will use pbqp to allocate it. // Empty intervals we allocate in a simple post-processing stage in // finalizeAlloc. - if (!li->empty()) { - vregsToAlloc.insert(li->reg); + if (!LI.empty()) { + VRegsToAlloc.insert(LI.reg); } else { - emptyIntervalVRegs.insert(li->reg); + EmptyIntervalVRegs.insert(LI.reg); + } + } +} + +static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, + const MachineFunction &MF) { + const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSR[i] != 0; ++i) + if (TRI.regsOverlap(reg, CSR[i])) + return true; + return false; +} + +void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { + MachineFunction &MF = G.getMetadata().MF; + + LiveIntervals &LIS = G.getMetadata().LIS; + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + + for (auto VReg : VRegsToAlloc) { + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + LiveInterval &VRegLI = LIS.getInterval(VReg); + + // Record any overlaps with regmask operands. + BitVector RegMaskOverlaps; + LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); + + // Compute an initial allowed set for the current vreg. + std::vector<unsigned> VRegAllowed; + ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF); + for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { + unsigned PReg = RawPRegOrder[I]; + if (MRI.isReserved(PReg)) + continue; + + // vregLI crosses a regmask operand that clobbers preg. + if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg)) + continue; + + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) { + if (VRegLI.overlaps(LIS.getRegUnit(*Units))) { + Interference = true; + break; + } + } + if (Interference) + continue; + + // preg is usable for this virtual register. + VRegAllowed.push_back(PReg); } + + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); + + // Tweak cost of callee saved registers, as using then force spilling and + // restoring them. This would only happen in the prologue / epilogue though. + for (unsigned i = 0; i != VRegAllowed.size(); ++i) + if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF)) + NodeCosts[1 + i] += 1.0; + + PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts)); + G.getNodeMetadata(NId).setVReg(VReg); + G.getNodeMetadata(NId).setAllowedRegs( + G.getMetadata().getAllowedRegs(std::move(VRegAllowed))); + G.getMetadata().setNodeIdForVReg(VReg, NId); } } -bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, - const PBQP::Solution &solution) { +bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller) { + MachineFunction &MF = G.getMetadata().MF; + LiveIntervals &LIS = G.getMetadata().LIS; + const TargetRegisterInfo &TRI = + *MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + (void)TRI; + // Set to true if we have any spills - bool anotherRoundNeeded = false; + bool AnotherRoundNeeded = false; // Clear the existing allocation. - vrm->clearAllVirt(); + VRM.clearAllVirt(); - const PBQPRAGraph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (auto NId : g.nodeIds()) { - unsigned vreg = problem.getVRegForNode(NId); - unsigned alloc = solution.getSelection(NId); - - if (problem.isPRegOption(vreg, alloc)) { - unsigned preg = problem.getPRegForOption(vreg, alloc); - DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> " - << tri->getName(preg) << "\n"); - assert(preg != 0 && "Invalid preg selected."); - vrm->assignVirt2Phys(vreg, preg); - } else if (problem.isSpillOption(vreg, alloc)) { - vregsToAlloc.erase(vreg); - SmallVector<unsigned, 8> newSpills; - LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); - spiller->spill(LRE); - - DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: " + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + unsigned AllocOption = Solution.getSelection(NId); + + if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { + unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> " + << TRI.getName(PReg) << "\n"); + assert(PReg != 0 && "Invalid preg selected."); + VRM.assignVirt2Phys(VReg, PReg); + } else { + VRegsToAlloc.erase(VReg); + SmallVector<unsigned, 8> NewSpills; + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM); + VRegSpiller.spill(LRE); + + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. - for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); - itr != end; ++itr) { - LiveInterval &li = lis->getInterval(*itr); - assert(!li.empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg(li.reg, tri) << " "); - vregsToAlloc.insert(li.reg); + for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); + I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + assert(!LI.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + VRegsToAlloc.insert(LI.reg); } DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. - anotherRoundNeeded |= !LRE.empty(); - } else { - llvm_unreachable("Unknown allocation option."); + AnotherRoundNeeded |= !LRE.empty(); } } - return !anotherRoundNeeded; + return !AnotherRoundNeeded; } +void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, + LiveIntervals &LIS, + VirtRegMap &VRM) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); -void RegAllocPBQP::finalizeAlloc() const { // First allocate registers for the empty intervals. for (RegSet::const_iterator - itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); - itr != end; ++itr) { - LiveInterval *li = &lis->getInterval(*itr); + I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); + I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); - unsigned physReg = mri->getSimpleHint(li->reg); + unsigned PReg = MRI.getSimpleHint(LI.reg); - if (physReg == 0) { - const TargetRegisterClass *liRC = mri->getRegClass(li->reg); - physReg = liRC->getRawAllocationOrder(*mf).front(); + if (PReg == 0) { + const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); + PReg = RC.getRawAllocationOrder(MF).front(); } - vrm->assignVirt2Phys(li->reg, physReg); + VRM.assignVirt2Phys(LI.reg, PReg); } } -bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { +static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, + unsigned NumInstr) { + // All intervals have a spill weight that is mostly proportional to the number + // of uses, with uses in loops having a bigger weight. + return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); +} - mf = &MF; - tm = &mf->getTarget(); - tri = tm->getRegisterInfo(); - tii = tm->getInstrInfo(); - mri = &mf->getRegInfo(); +bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + MachineBlockFrequencyInfo &MBFI = + getAnalysis<MachineBlockFrequencyInfo>(); - lis = &getAnalysis<LiveIntervals>(); - lss = &getAnalysis<LiveStacks>(); - mbfi = &getAnalysis<MachineBlockFrequencyInfo>(); + calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI, + normalizePBQPSpillWeight); - calculateSpillWeightsAndHints(*lis, MF, getAnalysis<MachineLoopInfo>(), - *mbfi); + VirtRegMap &VRM = getAnalysis<VirtRegMap>(); - vrm = &getAnalysis<VirtRegMap>(); - spiller.reset(createInlineSpiller(*this, MF, *vrm)); + std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); - mri->freezeReservedRegs(MF); + MF.getRegInfo().freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); // Allocator main loop: // @@ -560,72 +659,72 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // This process is continued till no more spills are generated. // Find the vreg intervals in need of allocation. - findVRegIntervalsToAlloc(); + findVRegIntervalsToAlloc(MF, LIS); #ifndef NDEBUG - const Function* func = mf->getFunction(); - std::string fqn = - func->getParent()->getModuleIdentifier() + "." + - func->getName().str(); + const Function &F = *MF.getFunction(); + std::string FullyQualifiedName = + F.getParent()->getModuleIdentifier() + "." + F.getName().str(); #endif // If there are non-empty intervals allocate them using pbqp. - if (!vregsToAlloc.empty()) { + if (!VRegsToAlloc.empty()) { - bool pbqpAllocComplete = false; - unsigned round = 0; + const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl(); + std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = + llvm::make_unique<PBQPRAConstraintList>(); + ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); + ConstraintsRoot->addConstraint(llvm::make_unique<Interference>()); + if (PBQPCoalescing) + ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>()); + ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); - while (!pbqpAllocComplete) { - DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); + bool PBQPAllocComplete = false; + unsigned Round = 0; - std::unique_ptr<PBQPRAProblem> problem( - builder->build(mf, lis, mbfi, vregsToAlloc)); + while (!PBQPAllocComplete) { + DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); + + PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); + initializeGraph(G); + ConstraintsRoot->apply(G); #ifndef NDEBUG - if (pbqpDumpGraphs) { - std::ostringstream rs; - rs << round; - std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); - std::string tmp; - raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text); - DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" - << graphFileName << "\"\n"); - problem->getGraph().dump(os); + if (PBQPDumpGraphs) { + std::ostringstream RS; + RS << Round; + std::string GraphFileName = FullyQualifiedName + "." + RS.str() + + ".pbqpgraph"; + std::error_code EC; + raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" + << GraphFileName << "\"\n"); + G.dumpToStream(OS); } #endif - PBQP::Solution solution = - PBQP::RegAlloc::solve(problem->getGraph()); - - pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); - - ++round; + PBQP::Solution Solution = PBQP::RegAlloc::solve(G); + PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); + ++Round; } } // Finalise allocation, allocate empty ranges. - finalizeAlloc(); - vregsToAlloc.clear(); - emptyIntervalVRegs.clear(); + finalizeAlloc(MF, LIS, VRM); + VRegsToAlloc.clear(); + EmptyIntervalVRegs.clear(); - DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); return true; } -FunctionPass * -llvm::createPBQPRegisterAllocator(std::unique_ptr<PBQPBuilder> builder, - char *customPassID) { - return new RegAllocPBQP(std::move(builder), customPassID); +FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { + return new RegAllocPBQP(customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - std::unique_ptr<PBQPBuilder> Builder; - if (pbqpCoalescing) - Builder = llvm::make_unique<PBQPBuilderWithCoalescing>(); - else - Builder = llvm::make_unique<PBQPBuilder>(); - return createPBQPRegisterAllocator(std::move(Builder)); + return createPBQPRegisterAllocator(); } #undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 8b5445c31624..ab33672d41f3 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -38,8 +37,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { MF = &mf; // Allocate new array the first time we see a new target. - if (MF->getTarget().getRegisterInfo() != TRI) { - TRI = MF->getTarget().getRegisterInfo(); + if (MF->getSubtarget().getRegisterInfo() != TRI) { + TRI = MF->getSubtarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); unsigned NumPSets = TRI->getNumRegPressureSets(); PSetLimits.reset(new unsigned[NumPSets]); @@ -48,6 +47,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } // Does this MF have different CSRs? + assert(TRI && "no register info set"); const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); if (Update || CSR != CalleeSaved) { // Build a CSRNum map. Every CSR alias gets an entry pointing to the last @@ -77,6 +77,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { /// registers filtered out. Volatile registers come first followed by CSR /// aliases ordered according to the CSR order specified by the target. void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { + assert(RC && "no register class given"); RCInfo &RCI = RegClass[RC->getID()]; // Raw register count, including all reserved regs. @@ -138,7 +139,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.LastCostChange = LastCostChange; DEBUG({ - dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; + dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n"); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index e04a3cf077ff..b8cae4a59ed5 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -32,6 +32,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -86,6 +87,14 @@ namespace { AliasAnalysis *AA; RegisterClassInfo RegClassInfo; + /// A LaneMask to remember on which subregister live ranges we need to call + /// shrinkToUses() later. + unsigned ShrinkMask; + + /// True if the main range of the currently coalesced intervals should be + /// checked for smaller live intervals. + bool ShrinkMainRange; + /// \brief True if the coalescer should aggressively coalesce global copies /// in favor of keeping local copies. bool JoinGlobalCopies; @@ -94,11 +103,11 @@ namespace { /// blocks exclusively containing copies. bool JoinSplitEdges; - /// WorkList - Copy instructions yet to be coalesced. + /// Copy instructions yet to be coalesced. SmallVector<MachineInstr*, 8> WorkList; SmallVector<MachineInstr*, 8> LocalWorkList; - /// ErasedInstrs - Set of instruction pointers that have been erased, and + /// Set of instruction pointers that have been erased, and /// that may be present in WorkList. SmallPtrSet<MachineInstr*, 8> ErasedInstrs; @@ -114,21 +123,21 @@ namespace { /// LiveRangeEdit callback. void LRE_WillEraseInstruction(MachineInstr *MI) override; - /// coalesceLocals - coalesce the LocalWorkList. + /// Coalesce the LocalWorkList. void coalesceLocals(); - /// joinAllIntervals - join compatible live intervals + /// Join compatible live intervals void joinAllIntervals(); - /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// Coalesce copies in the specified MBB, putting /// copies that cannot yet be coalesced into WorkList. void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return + /// Try to coalesce all copies in CurrList. Return /// true if any progress was made. bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); - /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns /// true if the copy was successfully coalesced away. If it is not /// currently possible to coalesce this interval, but it may be possible if @@ -136,7 +145,7 @@ namespace { /// 'Again'. bool joinCopy(MachineInstr *TheCopy, bool &Again); - /// joinIntervals - Attempt to join these two intervals. On failure, this + /// Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we /// can use this information below to update aliases. bool joinIntervals(CoalescerPair &CP); @@ -147,40 +156,53 @@ namespace { /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); - /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. + /// Subranges in @p LI which only partially interfere with the desired + /// LaneMask are split as necessary. @p LaneMask are the lanes that + /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange + /// lanemasks already adjusted to the coalesced register. + void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + unsigned LaneMask, CoalescerPair &CP); + + /// Join the liveranges of two subregisters. Joins @p RRange into + /// @p LRange, @p RRange may be invalid afterwards. + void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + unsigned LaneMask, const CoalescerPair &CP); + + /// We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); - /// hasOtherReachingDefs - Return true if there are definitions of IntB + /// Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo); - /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// We found a non-trivially-coalescable copy. /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); - /// reMaterializeTrivialDef - If the source of a copy is defined by a + /// If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy); - /// canJoinPhys - Return true if a physreg copy should be joined. + /// Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); - /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); - /// eliminateUndefCopy - Handle copies of undef values. - bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); + /// Handle copies of undef values. + bool eliminateUndefCopy(MachineInstr *CopyMI); public: static char ID; // Class identification, replacement for typeinfo @@ -192,10 +214,10 @@ namespace { void releaseMemory() override; - /// runOnMachineFunction - pass entry point + /// This is the pass entry point. bool runOnMachineFunction(MachineFunction&) override; - /// print - Implement the dump method. + /// Implement the dump method. void print(raw_ostream &O, const Module* = nullptr) const override; }; } /// end anonymous namespace @@ -407,7 +429,7 @@ void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { ErasedInstrs.insert(MI); } -/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, /// see if we can merge these two pieces of B into a single value number, @@ -492,6 +514,16 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValS->valno) IntB.MergeValueNumberInto(BValNo, ValS->valno); + + // Do the same for the subregister segments. + for (LiveInterval::SubRange &S : IntB.subranges()) { + VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); + S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); + VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); + if (SubBValNo != SubValSNo) + S.MergeValueNumberInto(SubBValNo, SubValSNo); + } + DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the @@ -512,7 +544,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, return true; } -/// hasOtherReachingDefs - Return true if there are definitions of IntB +/// Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, @@ -523,26 +555,37 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, if (LIS->hasPHIKill(IntA, AValNo)) return true; - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; + for (LiveRange::Segment &ASeg : IntA.segments) { + if (ASeg.valno != AValNo) continue; LiveInterval::iterator BI = - std::upper_bound(IntB.begin(), IntB.end(), AI->start); + std::upper_bound(IntB.begin(), IntB.end(), ASeg.start); if (BI != IntB.begin()) --BI; - for (; BI != IntB.end() && AI->end >= BI->start; ++BI) { + for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; - if (BI->start <= AI->start && BI->end > AI->start) + if (BI->start <= ASeg.start && BI->end > ASeg.start) return true; - if (BI->start > AI->start && BI->start < AI->end) + if (BI->start > ASeg.start && BI->start < ASeg.end) return true; } } return false; } -/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// Copy segements with value number @p SrcValNo from liverange @p Src to live +/// range @Dst and use value number @p DstValNo there. +static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, + const LiveRange &Src, const VNInfo *SrcValNo) +{ + for (const LiveRange::Segment &S : Src.segments) { + if (S.valno != SrcValNo) + continue; + Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); + } +} + +/// We found a non-trivially-coalescable copy with /// IntA being the source and IntB being the dest, thus this defines a value /// number in IntB. If the source value number (in IntA) is defined by a /// commutable instruction and its other operand is coalesced to the copy dest @@ -559,7 +602,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, /// /// B2 = op B0 A2<kill> /// ... -/// B1 = B2 <- now an identify copy +/// B1 = B2 <- now an identity copy /// ... /// = op B2 <- more uses /// @@ -567,25 +610,23 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, /// bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { - assert (!CP.isPhys()); - - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + assert(!CP.isPhys()); LiveInterval &IntA = - LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = - LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); - if (!BValNo || BValNo->def != CopyIdx) - return false; + assert(BValNo != nullptr && BValNo->def == CopyIdx); // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); - assert(AValNo && "COPY source not live"); - if (AValNo->isPHIDef() || AValNo->isUnused()) + assert(AValNo && !AValNo->isUnused() && "COPY source not live"); + if (AValNo->isPHIDef()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -652,8 +693,6 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MBB->insert(Pos, NewMI); MBB->erase(DefMI); } - unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); - NewMI->getOperand(OpIdx).setIsKill(); // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. // A = or A, B @@ -666,10 +705,13 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Update uses of IntA of the specific Val# with IntB. for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), - UE = MRI->use_end(); UI != UE;) { + UE = MRI->use_end(); + UI != UE; /* ++UI is below because of possible MI removal */) { MachineOperand &UseMO = *UI; - MachineInstr *UseMI = UseMO.getParent(); ++UI; + if (UseMO.isUndef()) + continue; + MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugValue()) { // FIXME These don't have an instruction index. Not clear we have enough // info to decide whether to do this replacement or not. For now do it. @@ -678,7 +720,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, } SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); - if (US == IntA.end() || US->valno != AValNo) + assert(US != IntA.end() && "Use must be live"); + if (US->valno != AValNo) continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); @@ -703,6 +746,16 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); + for (LiveInterval::SubRange &S : IntB.subranges()) { + VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); + if (!SubDVNI) + continue; + VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); + assert(SubBValNo->def == CopyIdx); + VNInfo *Merged = S.MergeValueNumberInto(SubBValNo, SubDVNI); + Merged->def = CopyIdx; + } + ErasedInstrs.insert(UseMI); LIS->RemoveMachineInstrFromMaps(UseMI); UseMI->eraseFromParent(); @@ -710,22 +763,77 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. - VNInfo *ValNo = BValNo; - ValNo->def = AValNo->def; - for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); - AI != AE; ++AI) { - if (AI->valno != AValNo) continue; - IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo)); + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + if (IntB.hasSubRanges()) { + if (!IntA.hasSubRanges()) { + unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + IntA.createSubRangeFrom(Allocator, Mask, IntA); + } + SlotIndex AIdx = CopyIdx.getRegSlot(true); + for (LiveInterval::SubRange &SA : IntA.subranges()) { + VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); + assert(ASubValNo != nullptr); + + unsigned AMask = SA.LaneMask; + for (LiveInterval::SubRange &SB : IntB.subranges()) { + unsigned BMask = SB.LaneMask; + unsigned Common = BMask & AMask; + if (Common == 0) + continue; + + DEBUG( + dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common)); + unsigned BRest = BMask & ~AMask; + LiveInterval::SubRange *CommonRange; + if (BRest != 0) { + SB.LaneMask = BRest; + DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest)); + // Duplicate SubRange for newly merged common stuff. + CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB); + } else { + // We van reuse the L SubRange. + SB.LaneMask = Common; + CommonRange = &SB; + } + LiveRange RangeCopy(SB, Allocator); + + VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx); + assert(BSubValNo->def == CopyIdx); + BSubValNo->def = ASubValNo->def; + addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo); + AMask &= ~BMask; + } + if (AMask != 0) { + DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask)); + LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); + VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); + addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); + } + SA.removeValNo(ASubValNo); + } } + + BValNo->def = AValNo->def; + addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); IntA.removeValNo(AValNo); + // Remove valuenos in subranges (the A+B have subranges case has already been + // handled above) + if (!IntB.hasSubRanges()) { + SlotIndex AIdx = CopyIdx.getRegSlot(true); + for (LiveInterval::SubRange &SA : IntA.subranges()) { + VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); + assert(ASubValNo != nullptr); + SA.removeValNo(ASubValNo); + } + } DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); ++numCommutes; return true; } -/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, @@ -751,7 +859,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, IsDefCopy = true; return false; } - if (!DefMI->isAsCheapAsAMove()) + if (!TII->isAsCheapAsAMove(DefMI)) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) return false; @@ -898,57 +1006,110 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, // The source interval can become smaller because we removed a use. LIS->shrinkToUses(&SrcInt, &DeadDefs); - if (!DeadDefs.empty()) + if (!DeadDefs.empty()) { + // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs + // to describe DstReg instead. + for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); + if (UseMI->isDebugValue()) { + UseMO.setReg(DstReg); + DEBUG(dbgs() << "\t\tupdated: " << *UseMI); + } + } eliminateDeadDefs(); + } return true; } -/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> +static void removeUndefValue(LiveRange &LR, SlotIndex At) +{ + VNInfo *VNInfo = LR.getVNInfoAt(At); + assert(VNInfo != nullptr && SlotIndex::isSameInstr(VNInfo->def, At)); + LR.removeValNo(VNInfo); +} + +/// ProcessImpicitDefs may leave some copies of <undef> /// values, it only removes local variables. When we have a copy like: /// /// %vreg1 = COPY %vreg2<undef> /// /// We delete the copy and remove the corresponding value number from %vreg1. /// Any uses of that value number are marked as <undef>. -bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, - const CoalescerPair &CP) { +bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { + // Note that we do not query CoalescerPair here but redo isMoveInstr as the + // CoalescerPair may have a new register class with adjusted subreg indices + // at this point. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + SlotIndex Idx = LIS->getInstructionIndex(CopyMI); - LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); - if (SrcInt->liveAt(Idx)) - return false; - LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); - if (DstInt->liveAt(Idx)) + const LiveInterval &SrcLI = LIS->getInterval(SrcReg); + // CopyMI is undef iff SrcReg is not live before the instruction. + if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { + unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); + for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { + if ((SR.LaneMask & SrcMask) == 0) + continue; + if (SR.liveAt(Idx)) + return false; + } + } else if (SrcLI.liveAt(Idx)) return false; - // No intervals are live-in to CopyMI - it is undef. - if (CP.isFlipped()) - DstInt = SrcInt; - SrcInt = nullptr; + DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); - VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); - assert(DeadVNI && "No value defined in DstInt"); - DstInt->removeValNo(DeadVNI); + // Remove any DstReg segments starting at the instruction. + LiveInterval &DstLI = LIS->getInterval(DstReg); + unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + SlotIndex RegIndex = Idx.getRegSlot(); + for (LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & DstMask) == 0) + continue; + removeUndefValue(SR, RegIndex); + + DstLI.removeEmptySubRanges(); + } + // Remove value or merge with previous one in case of a subregister def. + if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { + VNInfo *VNInfo = DstLI.getVNInfoAt(RegIndex); + DstLI.MergeValueNumberInto(VNInfo, PrevVNI); + } else { + removeUndefValue(DstLI, RegIndex); + } - // Find new undef uses. - for (MachineOperand &MO : MRI->reg_nodbg_operands(DstInt->reg)) { - if (MO.isDef() || MO.isUndef()) + // Mark uses as undef. + for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { + if (MO.isDef() /*|| MO.isUndef()*/) continue; - MachineInstr *MI = MO.getParent(); - SlotIndex Idx = LIS->getInstructionIndex(MI); - if (DstInt->liveAt(Idx)) + const MachineInstr &MI = *MO.getParent(); + SlotIndex UseIdx = LIS->getInstructionIndex(&MI); + unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + bool isLive; + if (UseMask != ~0u && DstLI.hasSubRanges()) { + isLive = false; + for (const LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & UseMask) == 0) + continue; + if (SR.liveAt(UseIdx)) { + isLive = true; + break; + } + } + } else + isLive = DstLI.liveAt(UseIdx); + if (isLive) continue; MO.setIsUndef(true); - DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); + DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); } return true; } -/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and -/// update the subregister number if it is not zero. If DstReg is a -/// physical register and the existing subregister number of the def / use -/// being updated is not zero, make sure to set it to the correct physical -/// subregister. +/// Replace all defs and uses of SrcReg to DstReg and update the subregister +/// number if it is not zero. If DstReg is a physical register and the existing +/// subregister number of the def / use being updated is not zero, make sure to +/// set it to the correct physical subregister. void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { @@ -966,7 +1127,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // the UseMI operands removes them from the SrcReg use-def chain, but when // SrcReg is DstReg we could encounter UseMI twice if it has multiple // operands mentioning the virtual register. - if (SrcReg == DstReg && !Visited.insert(UseMI)) + if (SrcReg == DstReg && !Visited.insert(UseMI).second) continue; SmallVector<unsigned,8> Ops; @@ -988,6 +1149,40 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, if (SubIdx && MO.isDef()) MO.setIsUndef(!Reads); + // A subreg use of a partially undef (super) register may be a complete + // undef use now and then has to be marked that way. + if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) { + if (!DstInt->hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); + } + unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx); + bool IsUndef = true; + SlotIndex MIIdx = UseMI->isDebugValue() + ? LIS->getSlotIndexes()->getIndexBefore(UseMI) + : LIS->getInstructionIndex(UseMI); + SlotIndex UseIdx = MIIdx.getRegSlot(true); + for (LiveInterval::SubRange &S : DstInt->subranges()) { + if ((S.LaneMask & Mask) == 0) + continue; + if (S.liveAt(UseIdx)) { + IsUndef = false; + break; + } + } + if (IsUndef) { + MO.setIsUndef(true); + // We found out some subregister use is actually reading an undefined + // value. In some cases the whole vreg has become undefined at this + // point so we have to potentially shrink the main range if the + // use was ending a live segment there. + LiveQueryResult Q = DstInt->Query(MIIdx); + if (Q.valueOut() == nullptr) + ShrinkMainRange = true; + } + } + if (DstIsPhys) MO.substPhysReg(DstReg, *TRI); else @@ -1003,7 +1198,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, } } -/// canJoinPhys - Return true if a copy involving a physreg should be joined. +/// Return true if a copy involving a physreg should be joined. bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is @@ -1021,7 +1216,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { return false; } -/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other @@ -1064,8 +1259,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } // Eliminate undefs. - if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { - DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); + if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { LIS->RemoveMachineInstrFromMaps(CopyMI); CopyMI->eraseFromParent(); return false; // Not coalescable. @@ -1077,12 +1271,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI)); + const SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + LiveQueryResult LRQ = LI.Query(CopyIdx); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no <undef> flag."); assert(ReadVNI != DefVNI && "Cannot read and define the same value."); LI.MergeValueNumberInto(DefVNI, ReadVNI); + + // Process subregister liveranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveQueryResult SLRQ = S.Query(CopyIdx); + if (VNInfo *SDefVNI = SLRQ.valueDefined()) { + VNInfo *SReadVNI = SLRQ.valueIn(); + S.MergeValueNumberInto(SDefVNI, SReadVNI); + } + } DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } LIS->RemoveMachineInstrFromMaps(CopyMI); @@ -1106,9 +1310,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return false; } } else { + // When possible, let DstReg be the larger interval. + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) + CP.flip(); + DEBUG({ - dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() - << " with "; + dbgs() << "\tConsidering merging to " + << TRI->getRegClassName(CP.getNewRC()) << " with "; if (CP.getDstIdx() && CP.getSrcIdx()) dbgs() << PrintReg(CP.getDstReg()) << " in " << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " @@ -1118,13 +1327,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; }); - - // When possible, let DstReg be the larger interval. - if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > - LIS->getInterval(CP.getDstReg()).size()) - CP.flip(); } + ShrinkMask = 0; + ShrinkMainRange = false; + // Okay, attempt to join these two intervals. On failure, this returns false. // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have @@ -1179,6 +1386,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + // Shrink subregister ranges if necessary. + if (ShrinkMask != 0) { + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & ShrinkMask) == 0) + continue; + DEBUG(dbgs() << "Shrink LaneUses (Lane " + << format("%04X", S.LaneMask) << ")\n"); + LIS->shrinkToUses(S, LI.reg); + } + } + if (ShrinkMainRange) { + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + LIS->shrinkToUses(&LI); + } + // SrcReg is guaranteed to be the register whose live interval that is // being merged. LIS->removeInterval(CP.getSrcReg()); @@ -1187,7 +1410,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tJoined. Result = "; + dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) + << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; + dbgs() << "\tResult = "; if (CP.isPhys()) dbgs() << PrintReg(CP.getDstReg(), TRI); else @@ -1305,13 +1530,26 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { namespace { /// Track information about values in a single virtual register about to be /// joined. Objects of this class are always created in pairs - one for each -/// side of the CoalescerPair. +/// side of the CoalescerPair (or one for each lane of a side of the coalescer +/// pair) class JoinVals { - LiveInterval &LI; - - // Location of this register in the final joined register. - // Either CP.DstIdx or CP.SrcIdx. - unsigned SubIdx; + /// Live range we work on. + LiveRange &LR; + /// (Main) register we work on. + const unsigned Reg; + + // Reg (and therefore the values in this liverange) will end up as subregister + // SubIdx in the coalesced register. Either CP.DstIdx or CP.SrcIdx. + const unsigned SubIdx; + // The LaneMask that this liverange will occupy the coalesced register. May be + // smaller than the lanemask produced by SubIdx when merging subranges. + const unsigned LaneMask; + + /// This is true when joining sub register ranges, false when joining main + /// ranges. + const bool SubRangeJoin; + /// Whether the current LiveInterval tracks subregister liveness. + const bool TrackSubRegLiveness; // Values that will be present in the final live range. SmallVectorImpl<VNInfo*> &NewVNInfo; @@ -1402,27 +1640,28 @@ class JoinVals { // One entry per value number in LI. SmallVector<Val, 8> Vals; - unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); - VNInfo *stripCopies(VNInfo *VNI); + unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; + bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); void computeAssignment(unsigned ValNo, JoinVals &Other); bool taintExtent(unsigned, unsigned, JoinVals&, SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); - bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); + bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const; bool isPrunedValue(unsigned ValNo, JoinVals &Other); public: - JoinVals(LiveInterval &li, unsigned subIdx, - SmallVectorImpl<VNInfo*> &newVNInfo, - const CoalescerPair &cp, - LiveIntervals *lis, - const TargetRegisterInfo *tri) - : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), - Indexes(LIS->getSlotIndexes()), TRI(tri), - Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) + JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask, + SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, + LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, + bool TrackSubRegLiveness) + : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), + SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), + NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), + TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} - /// Analyze defs in LI and compute a value mapping in NewVNInfo. + /// Analyze defs in LR and compute a value mapping in NewVNInfo. /// Returns false if any conflicts were impossible to resolve. bool mapValues(JoinVals &Other); @@ -1430,16 +1669,22 @@ public: /// Returns false if any conflicts were impossible to resolve. bool resolveConflicts(JoinVals &Other); - /// Prune the live range of values in Other.LI where they would conflict with - /// CR_Replace values in LI. Collect end points for restoring the live range + /// Prune the live range of values in Other.LR where they would conflict with + /// CR_Replace values in LR. Collect end points for restoring the live range /// after joining. - void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); + void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, + bool changeInstrs); + + // Removes subranges starting at copies that get removed. This sometimes + // happens when undefined subranges are copied around. These ranges contain + // no usefull information and can be removed. + void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask); /// Erase any machine instructions that have been coalesced away. /// Add erased instructions to ErasedInstrs. /// Add foreign virtual registers to ShrinkRegs if their live range ended at /// the erased instrs. - void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, + void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs); /// Get the value assignments suitable for passing to LiveInterval::join. @@ -1450,10 +1695,11 @@ public: /// Compute the bitmask of lanes actually written by DefMI. /// Set Redef if there are any partial register definitions that depend on the /// previous value of the register. -unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { +unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) + const { unsigned L = 0; for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { - if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) + if (!MO->isReg() || MO->getReg() != Reg || !MO->isDef()) continue; L |= TRI->getSubRegIndexLaneMask( TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); @@ -1464,21 +1710,64 @@ unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { } /// Find the ultimate value that VNI was copied from. -VNInfo *JoinVals::stripCopies(VNInfo *VNI) { +std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( + const VNInfo *VNI) const { + unsigned Reg = this->Reg; + while (!VNI->isPHIDef()) { - MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); + SlotIndex Def = VNI->def; + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No defining instruction"); if (!MI->isFullCopy()) + return std::make_pair(VNI, Reg); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return std::make_pair(VNI, Reg); + + const LiveInterval &LI = LIS->getInterval(SrcReg); + const VNInfo *ValueIn; + // No subrange involved. + if (!SubRangeJoin || !LI.hasSubRanges()) { + LiveQueryResult LRQ = LI.Query(Def); + ValueIn = LRQ.valueIn(); + } else { + // Query subranges. Pick the first matching one. + ValueIn = nullptr; + for (const LiveInterval::SubRange &S : LI.subranges()) { + // Transform lanemask to a mask in the joined live interval. + unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); + if ((SMask & LaneMask) == 0) + continue; + LiveQueryResult LRQ = S.Query(Def); + ValueIn = LRQ.valueIn(); + break; + } + } + if (ValueIn == nullptr) break; - unsigned Reg = MI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - break; - LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def); - if (!LRQ.valueIn()) - break; - VNI = LRQ.valueIn(); + VNI = ValueIn; + Reg = SrcReg; } - return VNI; + return std::make_pair(VNI, Reg); +} + +bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, + const JoinVals &Other) const { + const VNInfo *Orig0; + unsigned Reg0; + std::tie(Orig0, Reg0) = followCopyChain(Value0); + if (Orig0 == Value1) + return true; + + const VNInfo *Orig1; + unsigned Reg1; + std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); + + // The values are equal if they are defined at the same place and use the + // same register. Note that we cannot compare VNInfos directly as some of + // them might be from a copy created in mergeSubRangeInto() while the other + // is from the original LiveInterval. + return Orig0->def == Orig1->def && Reg0 == Reg1; } /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. @@ -1492,7 +1781,7 @@ JoinVals::ConflictResolution JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { Val &V = Vals[ValNo]; assert(!V.isAnalyzed() && "Value has already been analyzed!"); - VNInfo *VNI = LI.getValNumInfo(ValNo); + VNInfo *VNI = LR.getValNumInfo(ValNo); if (VNI->isUnused()) { V.WriteLanes = ~0u; return CR_Keep; @@ -1502,46 +1791,56 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. - V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); + unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + V.ValidLanes = V.WriteLanes = Lanes; } else { DefMI = Indexes->getInstructionFromIndex(VNI->def); - bool Redef = false; - V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); - - // If this is a read-modify-write instruction, there may be more valid - // lanes than the ones written by this instruction. - // This only covers partial redef operands. DefMI may have normal use - // operands reading the register. They don't contribute valid lanes. - // - // This adds ssub1 to the set of valid lanes in %src: - // - // %src:ssub1<def> = FOO - // - // This leaves only ssub1 valid, making any other lanes undef: - // - // %src:ssub1<def,read-undef> = FOO %src:ssub2 - // - // The <read-undef> flag on the def operand means that old lane values are - // not important. - if (Redef) { - V.RedefVNI = LI.Query(VNI->def).valueIn(); - assert(V.RedefVNI && "Instruction is reading nonexistent value"); - computeAssignment(V.RedefVNI->id, Other); - V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; - } + assert(DefMI != nullptr); + if (SubRangeJoin) { + // We don't care about the lanes when joining subregister ranges. + V.ValidLanes = V.WriteLanes = 1; + } else { + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1<def> = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1<def,read-undef> = FOO %src:ssub2 + // + // The <read-undef> flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LR.Query(VNI->def).valueIn(); + assert((TrackSubRegLiveness || V.RedefVNI) && + "Instruction is reading nonexistent value"); + if (V.RedefVNI != nullptr) { + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } + } - // An IMPLICIT_DEF writes undef values. - if (DefMI->isImplicitDef()) { - // We normally expect IMPLICIT_DEF values to be live only until the end - // of their block. If the value is really live longer and gets pruned in - // another block, this flag is cleared again. - V.ErasableImplicitDef = true; - V.ValidLanes &= ~V.WriteLanes; + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) { + // We normally expect IMPLICIT_DEF values to be live only until the end + // of their block. If the value is really live longer and gets pruned in + // another block, this flag is cleared again. + V.ErasableImplicitDef = true; + V.ValidLanes &= ~V.WriteLanes; + } } } // Find the value in Other that overlaps VNI->def, if any. - LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def); + LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def); // It is possible that both values are defined by the same instruction, or // the values are PHIs defined in the same block. When that happens, the two @@ -1611,8 +1910,14 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Replace; // Check for simple erasable conflicts. - if (DefMI->isImplicitDef()) + if (DefMI->isImplicitDef()) { + // We need the def for the subregister if there is nothing else live at the + // subrange at this point. + if (TrackSubRegLiveness + && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0) + return CR_Replace; return CR_Erase; + } // Include the non-conflict where DefMI is a coalescable copy that kills // OtherVNI. We still want the copy erased and value numbers merged. @@ -1633,8 +1938,8 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // %other = COPY %ext // %this = COPY %ext <-- Erase this copy // - if (DefMI->isFullCopy() && !CP.isPartial() && - stripCopies(VNI) == stripCopies(V.OtherVNI)) + if (DefMI->isFullCopy() && !CP.isPartial() + && valuesIdentical(VNI, V.OtherVNI, Other)) return CR_Erase; // If the lanes written by this instruction were all undef in OtherVNI, it is @@ -1669,7 +1974,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // VNI is clobbering live lanes in OtherVNI, but there is still the // possibility that no instructions actually read the clobbered lanes. // If we're clobbering all the lanes in OtherVNI, at least one must be read. - // Otherwise Other.LI wouldn't be live here. + // Otherwise Other.RI wouldn't be live here. if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) return CR_Impossible; @@ -1690,7 +1995,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { return CR_Unresolved; } -/// Compute the value assignment for ValNo in LI. +/// Compute the value assignment for ValNo in RI. /// This may be called recursively by analyzeValue(), but never for a ValNo on /// the stack. void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { @@ -1708,42 +2013,48 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; - DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' - << LI.getValNumInfo(ValNo)->def << " into " - << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' + DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@' + << LR.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@' << V.OtherVNI->def << " --> @" << NewVNInfo[Assignments[ValNo]]->def << '\n'); break; case CR_Replace: - case CR_Unresolved: + case CR_Unresolved: { // The other value is going to be pruned if this join is successful. assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); - Other.Vals[V.OtherVNI->id].Pruned = true; + Val &OtherV = Other.Vals[V.OtherVNI->id]; + // We cannot erase an IMPLICIT_DEF if we don't have valid values for all + // its lanes. + if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness) + OtherV.ErasableImplicitDef = false; + OtherV.Pruned = true; + } // Fall through. default: // This value number needs to go in the final joined live range. Assignments[ValNo] = NewVNInfo.size(); - NewVNInfo.push_back(LI.getValNumInfo(ValNo)); + NewVNInfo.push_back(LR.getValNumInfo(ValNo)); break; } } bool JoinVals::mapValues(JoinVals &Other) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { computeAssignment(i, Other); if (Vals[i].Resolution == CR_Impossible) { - DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i - << '@' << LI.getValNumInfo(i)->def << '\n'); + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); return false; } } return true; } -/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute +/// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute /// the extent of the tainted lanes in the block. /// -/// Multiple values in Other.LI can be affected since partial redefinitions can +/// Multiple values in Other.LR can be affected since partial redefinitions can /// preserve previously tainted lanes. /// /// 1 %dst = VLOAD <-- Define all lanes in %dst @@ -1758,23 +2069,23 @@ bool JoinVals::mapValues(JoinVals &Other) { bool JoinVals:: taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { - VNInfo *VNI = LI.getValNumInfo(ValNo); + VNInfo *VNI = LR.getValNumInfo(ValNo); MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); - // Scan Other.LI from VNI.def to MBBEnd. - LiveInterval::iterator OtherI = Other.LI.find(VNI->def); - assert(OtherI != Other.LI.end() && "No conflict?"); + // Scan Other.LR from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LR.find(VNI->def); + assert(OtherI != Other.LR.end() && "No conflict?"); do { // OtherI is pointing to a tainted value. Abort the join if the tainted // lanes escape the block. SlotIndex End = OtherI->end; if (End >= MBBEnd) { - DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':' << OtherI->valno->id << '@' << OtherI->start << '\n'); return false; } - DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':' << OtherI->valno->id << '@' << OtherI->start << " to " << End << '\n'); // A dead def is not a problem. @@ -1783,7 +2094,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, TaintExtent.push_back(std::make_pair(End, TaintedLanes)); // Check for another def in the MBB. - if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) + if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd) break; // Lanes written by the new def are no longer tainted. @@ -1797,8 +2108,8 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. -bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, - unsigned Lanes) { +bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, + unsigned Lanes) const { if (MI->isDebugValue()) return false; for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { @@ -1814,16 +2125,19 @@ bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, } bool JoinVals::resolveConflicts(JoinVals &Other) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { Val &V = Vals[i]; assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); if (V.Resolution != CR_Unresolved) continue; - DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i - << '@' << LI.getValNumInfo(i)->def << '\n'); + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); + if (SubRangeJoin) + return false; + ++NumLaneConflicts; assert(V.OtherVNI && "Inconsistent conflict resolution."); - VNInfo *VNI = LI.getValNumInfo(i); + VNInfo *VNI = LR.getValNumInfo(i); const Val &OtherV = Other.Vals[V.OtherVNI->id]; // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the @@ -1853,7 +2167,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { unsigned TaintNum = 0; for(;;) { assert(MI != MBB->end() && "Bad LastMI"); - if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { + if (usesLanes(MI, Other.Reg, Other.SubIdx, TaintedLanes)) { DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); return false; } @@ -1875,7 +2189,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { return true; } -// Determine if ValNo is a copy of a value number in LI or Other.LI that will +// Determine if ValNo is a copy of a value number in LR or Other.LR that will // be pruned: // // %dst = COPY %src @@ -1898,15 +2212,16 @@ bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { } void JoinVals::pruneValues(JoinVals &Other, - SmallVectorImpl<SlotIndex> &EndPoints) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { - SlotIndex Def = LI.getValNumInfo(i)->def; + SmallVectorImpl<SlotIndex> &EndPoints, + bool changeInstrs) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + SlotIndex Def = LR.getValNumInfo(i)->def; switch (Vals[i].Resolution) { case CR_Keep: break; case CR_Replace: { - // This value takes precedence over the value in Other.LI. - LIS->pruneValue(&Other.LI, Def, &EndPoints); + // This value takes precedence over the value in Other.LR. + LIS->pruneValue(Other.LR, Def, &EndPoints); // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF // instructions are only inserted to provide a live-out value for PHI // predecessors, so the instruction should simply go away once its value @@ -1915,34 +2230,37 @@ void JoinVals::pruneValues(JoinVals &Other, bool EraseImpDef = OtherV.ErasableImplicitDef && OtherV.Resolution == CR_Keep; if (!Def.isBlock()) { - // Remove <def,read-undef> flags. This def is now a partial redef. - // Also remove <def,dead> flags since the joined live range will - // continue past this instruction. - for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); - MO.isValid(); ++MO) - if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { - MO->setIsUndef(EraseImpDef); - MO->setIsDead(false); + if (changeInstrs) { + // Remove <def,read-undef> flags. This def is now a partial redef. + // Also remove <def,dead> flags since the joined live range will + // continue past this instruction. + for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); + MO.isValid(); ++MO) { + if (MO->isReg() && MO->isDef() && MO->getReg() == Reg) { + MO->setIsUndef(EraseImpDef); + MO->setIsDead(false); + } } + } // This value will reach instructions below, but we need to make sure // the live range also reaches the instruction at Def. if (!EraseImpDef) EndPoints.push_back(Def); } - DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def - << ": " << Other.LI << '\n'); + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def + << ": " << Other.LR << '\n'); break; } case CR_Erase: case CR_Merge: if (isPrunedValue(i, Other)) { - // This value is ultimately a copy of a pruned value in LI or Other.LI. + // This value is ultimately a copy of a pruned value in LR or Other.LR. // We can no longer trust the value mapping computed by // computeAssignment(), the value that was originally copied could have // been replaced. - LIS->pruneValue(&LI, Def, &EndPoints); - DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " - << Def << ": " << LI << '\n'); + LIS->pruneValue(LR, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at " + << Def << ": " << LR << '\n'); } break; case CR_Unresolved: @@ -1952,11 +2270,49 @@ void JoinVals::pruneValues(JoinVals &Other, } } -void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, +void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask) +{ + // Look for values being erased. + bool DidPrune = false; + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + if (Vals[i].Resolution != CR_Erase) + continue; + + // Check subranges at the point where the copy will be removed. + SlotIndex Def = LR.getValNumInfo(i)->def; + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveQueryResult Q = S.Query(Def); + + // If a subrange starts at the copy then an undefined value has been + // copied and we must remove that subrange value as well. + VNInfo *ValueOut = Q.valueOutOrDead(); + if (ValueOut != nullptr && Q.valueIn() == nullptr) { + DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask) + << " at " << Def << "\n"); + LIS->pruneValue(S, Def, nullptr); + DidPrune = true; + // Mark value number as unused. + ValueOut->markUnused(); + continue; + } + // If a subrange ends at the copy, then a value was copied but only + // partially used later. Shrink the subregister range apropriately. + if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { + DEBUG(dbgs() << "\t\tDead uses at sublane " + << format("%04X", S.LaneMask) << " at " << Def << "\n"); + ShrinkMask |= S.LaneMask; + } + } + } + if (DidPrune) + LI.removeEmptySubRanges(); +} + +void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, SmallVectorImpl<unsigned> &ShrinkRegs) { - for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. - SlotIndex Def = LI.getValNumInfo(i)->def; + SlotIndex Def = LR.getValNumInfo(i)->def; switch (Vals[i].Resolution) { case CR_Keep: // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any @@ -1964,12 +2320,12 @@ void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, // PHIElimination to guarantee that all PHI predecessors have a value. if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) break; - // Remove value number i from LI. Note that this VNInfo is still present + // Remove value number i from LR. Note that this VNInfo is still present // in NewVNInfo, so it will appear as an unused value number in the final // joined interval. - LI.getValNumInfo(i)->markUnused(); - LI.removeValNo(LI.getValNumInfo(i)); - DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); + LR.getValNumInfo(i)->markUnused(); + LR.removeValNo(LR.getValNumInfo(i)); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'); // FALL THROUGH. case CR_Erase: { @@ -1993,12 +2349,96 @@ void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, } } +void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + unsigned LaneMask, + const CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, + NewVNInfo, CP, LIS, TRI, true, true); + JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, + NewVNInfo, CP, LIS, TRI, true, true); + + /// Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) + /// Conflicts should already be resolved so the mapping/resolution should + /// always succeed. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + llvm_unreachable("Can't join subrange although main ranges are compatible"); + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + llvm_unreachable("Can't join subrange although main ranges are compatible"); + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints, false); + RHSVals.pruneValues(LHSVals, EndPoints, false); + + LRange.verify(); + RRange.verify(); + + // Join RRange into LHS. + LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), + NewVNInfo); + + DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); + if (EndPoints.empty()) + return; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LRange << '\n'); + LIS->extendToIndices(LRange, EndPoints); +} + +void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, + const LiveRange &ToMerge, + unsigned LaneMask, CoalescerPair &CP) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + for (LiveInterval::SubRange &R : LI.subranges()) { + unsigned RMask = R.LaneMask; + // LaneMask of subregisters common to subrange R and ToMerge. + unsigned Common = RMask & LaneMask; + // There is nothing to do without common subregs. + if (Common == 0) + continue; + + DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common)); + // LaneMask of subregisters contained in the R range but not in ToMerge, + // they have to split into their own subrange. + unsigned LRest = RMask & ~LaneMask; + LiveInterval::SubRange *CommonRange; + if (LRest != 0) { + R.LaneMask = LRest; + DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest)); + // Duplicate SubRange for newly merged common stuff. + CommonRange = LI.createSubRangeFrom(Allocator, Common, R); + } else { + // Reuse the existing range. + R.LaneMask = Common; + CommonRange = &R; + } + LiveRange RangeCopy(ToMerge, Allocator); + joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); + LaneMask &= ~RMask; + } + + if (LaneMask != 0) { + DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask)); + LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); + } +} + bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { SmallVector<VNInfo*, 16> NewVNInfo; LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); - JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); - JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); + bool TrackSubRegLiveness = MRI->tracksSubRegLiveness(); + JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, + TRI, false, TrackSubRegLiveness); + JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, + TRI, false, TrackSubRegLiveness); DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS @@ -2014,14 +2454,55 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { return false; // All clear, the live ranges can be merged. + if (RHS.hasSubRanges() || LHS.hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + + // Transform lanemasks from the LHS to masks in the coalesced register and + // create initial subranges if necessary. + unsigned DstIdx = CP.getDstIdx(); + if (!LHS.hasSubRanges()) { + unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(DstIdx); + // LHS must support subregs or we wouldn't be in this codepath. + assert(Mask != 0); + LHS.createSubRangeFrom(Allocator, Mask, LHS); + } else if (DstIdx != 0) { + // Transform LHS lanemasks to new register class if necessary. + for (LiveInterval::SubRange &R : LHS.subranges()) { + unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); + R.LaneMask = Mask; + } + } + DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg()) + << ' ' << LHS << '\n'); + + // Determine lanemasks of RHS in the coalesced register and merge subranges. + unsigned SrcIdx = CP.getSrcIdx(); + if (!RHS.hasSubRanges()) { + unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(SrcIdx); + mergeSubRangeInto(LHS, RHS, Mask, CP); + } else { + // Pair up subranges and merge. + for (LiveInterval::SubRange &R : RHS.subranges()) { + unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); + mergeSubRangeInto(LHS, R, Mask, CP); + } + } + + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } // The merging algorithm in LiveInterval::join() can't handle conflicting // value mappings, so we need to remove any live ranges that overlap a // CR_Replace resolution. Collect a set of end points that can be used to // restore the live range after joining. SmallVector<SlotIndex, 8> EndPoints; - LHSVals.pruneValues(RHSVals, EndPoints); - RHSVals.pruneValues(LHSVals, EndPoints); + LHSVals.pruneValues(RHSVals, EndPoints, true); + RHSVals.pruneValues(LHSVals, EndPoints, true); // Erase COPY and IMPLICIT_DEF instructions. This may cause some external // registers to require trimming. @@ -2040,19 +2521,18 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { MRI->clearKillFlags(LHS.reg); MRI->clearKillFlags(RHS.reg); - if (EndPoints.empty()) - return true; + if (!EndPoints.empty()) { + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices((LiveRange&)LHS, EndPoints); + } - // Recompute the parts of the live range we had to remove because of - // CR_Replace conflicts. - DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() - << " points: " << LHS << '\n'); - LIS->extendToIndices(LHS, EndPoints); return true; } -/// joinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. +/// Attempt to join these two intervals. On failure, this returns false. bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); } @@ -2224,8 +2704,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &fn.getRegInfo(); TM = &fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = TM->getSubtargetImpl()->getRegisterInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); LIS = &getAnalysis<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); Loops = &getAnalysis<MachineLoopInfo>(); @@ -2266,7 +2746,22 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { continue; if (MRI->recomputeRegClass(Reg, *TM)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " - << MRI->getRegClass(Reg)->getName() << '\n'); + << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + LiveInterval &LI = LIS->getInterval(Reg); + unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + if (MaxMask == 0) { + // If the inflated register class does not support subregisters anymore + // remove the subranges. + LI.clearSubRanges(); + } else { + // If subranges are still supported, then the same subregs should still + // be supported. +#ifndef NDEBUG + for (LiveInterval::SubRange &S : LI.subranges()) { + assert ((S.LaneMask & ~MaxMask) == 0); + } +#endif + } ++NumInflated; } } @@ -2277,7 +2772,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { return true; } -/// print - Implement the dump method. +/// Implement the dump method. void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { LIS->print(O, m); } diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index e57ceab37d0d..04067a1427af 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H -#define LLVM_CODEGEN_REGISTER_COALESCER_H +#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H +#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H namespace llvm { @@ -22,38 +22,36 @@ namespace llvm { class TargetRegisterClass; class TargetInstrInfo; - /// CoalescerPair - A helper class for register coalescers. When deciding if + /// A helper class for register coalescers. When deciding if /// two registers can be coalesced, CoalescerPair can determine if a copy /// instruction would become an identity copy after coalescing. class CoalescerPair { const TargetRegisterInfo &TRI; - /// DstReg - The register that will be left after coalescing. It can be a + /// The register that will be left after coalescing. It can be a /// virtual or physical register. unsigned DstReg; - /// SrcReg - the virtual register that will be coalesced into dstReg. + /// The virtual register that will be coalesced into dstReg. unsigned SrcReg; - /// DstIdx - The sub-register index of the old DstReg in the new coalesced - /// register. + /// The sub-register index of the old DstReg in the new coalesced register. unsigned DstIdx; - /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced - /// register. + /// The sub-register index of the old SrcReg in the new coalesced register. unsigned SrcIdx; - /// Partial - True when the original copy was a partial subregister copy. + /// True when the original copy was a partial subregister copy. bool Partial; - /// CrossClass - True when both regs are virtual, and newRC is constrained. + /// True when both regs are virtual and newRC is constrained. bool CrossClass; - /// Flipped - True when DstReg and SrcReg are reversed from the original + /// True when DstReg and SrcReg are reversed from the original /// copy instruction. bool Flipped; - /// NewRC - The register class of the coalesced register, or NULL if DstReg + /// The register class of the coalesced register, or NULL if DstReg /// is a physreg. This register class may be a super-register of both /// SrcReg and DstReg. const TargetRegisterClass *NewRC; @@ -70,49 +68,47 @@ namespace llvm { : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} - /// setRegisters - set registers to match the copy instruction MI. Return + /// Set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); - /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible + /// Swap SrcReg and DstReg. Return false if swapping is impossible /// because DstReg is a physical register, or SubIdx is set. bool flip(); - /// isCoalescable - Return true if MI is a copy instruction that will become + /// Return true if MI is a copy instruction that will become /// an identity copy after coalescing. bool isCoalescable(const MachineInstr*) const; - /// isPhys - Return true if DstReg is a physical register. + /// Return true if DstReg is a physical register. bool isPhys() const { return !NewRC; } - /// isPartial - Return true if the original copy instruction did not copy + /// Return true if the original copy instruction did not copy /// the full register, but was a subreg operation. bool isPartial() const { return Partial; } - /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller + /// Return true if DstReg is virtual and NewRC is a smaller /// register class than DstReg's. bool isCrossClass() const { return CrossClass; } - /// isFlipped - Return true when getSrcReg is the register being defined by + /// Return true when getSrcReg is the register being defined by /// the original copy instruction. bool isFlipped() const { return Flipped; } - /// getDstReg - Return the register (virtual or physical) that will remain + /// Return the register (virtual or physical) that will remain /// after coalescing. unsigned getDstReg() const { return DstReg; } - /// getSrcReg - Return the virtual register that will be coalesced away. + /// Return the virtual register that will be coalesced away. unsigned getSrcReg() const { return SrcReg; } - /// getDstIdx - Return the subregister index that DstReg will be coalesced - /// into, or 0. + /// Return the subregister index that DstReg will be coalesced into, or 0. unsigned getDstIdx() const { return DstIdx; } - /// getSrcIdx - Return the subregister index that SrcReg will be coalesced - /// into, or 0. + /// Return the subregister index that SrcReg will be coalesced into, or 0. unsigned getSrcIdx() const { return SrcIdx; } - /// getNewRC - Return the register class of the coalesced register. + /// Return the register class of the coalesced register. const TargetRegisterClass *getNewRC() const { return NewRC; } }; } // End llvm namespace diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 617e45989c66..9925efbff24b 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -185,7 +184,7 @@ void RegPressureTracker::init(const MachineFunction *mf, reset(); MF = mf; - TRI = MF->getTarget().getRegisterInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 72b628589e47..7626dd29c00a 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -24,24 +24,16 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "reg-scavenging" -/// setUsed - Set the register and its sub-registers as being used. -void RegScavenger::setUsed(unsigned Reg) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - RegsAvailable.reset(*SubRegs); -} - -bool RegScavenger::isAliasUsed(unsigned Reg) const { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - if (isUsed(*AI, *AI == Reg)) - return true; - return false; +/// setUsed - Set the register units of this register as used. +void RegScavenger::setRegUsed(unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + RegUnitsAvailable.reset(*RUI); } void RegScavenger::initRegState() { @@ -51,8 +43,8 @@ void RegScavenger::initRegState() { I->Restore = nullptr; } - // All registers started out unused. - RegsAvailable.set(); + // All register units start out unused. + RegUnitsAvailable.set(); if (!MBB) return; @@ -60,22 +52,21 @@ void RegScavenger::initRegState() { // Live-in registers are in use. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - setUsed(*I); + setRegUsed(*I); // Pristine CSRs are also unavailable. BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) - setUsed(I); + setRegUsed(I); } void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { MachineFunction &MF = *mbb->getParent(); - const TargetMachine &TM = MF.getTarget(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && + assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) && "Target changed?"); // It is not possible to use the register scavenger after late optimization @@ -85,17 +76,11 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { // Self-initialize. if (!MBB) { - NumPhysRegs = TRI->getNumRegs(); - RegsAvailable.resize(NumPhysRegs); - KillRegs.resize(NumPhysRegs); - DefRegs.resize(NumPhysRegs); - - // Create callee-saved registers bitvector. - CalleeSavedRegs.resize(NumPhysRegs); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); - if (CSRegs != nullptr) - for (unsigned i = 0; CSRegs[i]; ++i) - CalleeSavedRegs.set(CSRegs[i]); + NumRegUnits = TRI->getNumRegUnits(); + RegUnitsAvailable.resize(NumRegUnits); + KillRegUnits.resize(NumRegUnits); + DefRegUnits.resize(NumRegUnits); + TmpRegUnits.resize(NumRegUnits); } MBB = mbb; @@ -104,10 +89,9 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { Tracking = false; } -void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - BV.set(*SubRegs); +void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + BV.set(*RUI); } void RegScavenger::determineKillsAndDefs() { @@ -122,12 +106,25 @@ void RegScavenger::determineKillsAndDefs() { // predicated, conservatively assume "kill" markers do not actually kill the // register. Similarly ignores "dead" markers. bool isPred = TII->isPredicated(MI); - KillRegs.reset(); - DefRegs.reset(); + KillRegUnits.reset(); + DefRegUnits.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask()); + if (MO.isRegMask()) { + + TmpRegUnits.clear(); + for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) { + for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) { + if (MO.clobbersPhysReg(*RURI)) { + TmpRegUnits.set(RU); + break; + } + } + } + + // Apply the mask. + (isPred ? DefRegUnits : KillRegUnits) |= TmpRegUnits; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -139,13 +136,13 @@ void RegScavenger::determineKillsAndDefs() { if (MO.isUndef()) continue; if (!isPred && MO.isKill()) - addRegWithSubRegs(KillRegs, Reg); + addRegUnits(KillRegUnits, Reg); } else { assert(MO.isDef()); if (!isPred && MO.isDead()) - addRegWithSubRegs(KillRegs, Reg); + addRegUnits(KillRegUnits, Reg); else - addRegWithSubRegs(DefRegs, Reg); + addRegUnits(DefRegUnits, Reg); } } } @@ -158,8 +155,8 @@ void RegScavenger::unprocess() { determineKillsAndDefs(); // Commit the changes. - setUsed(KillRegs); - setUnused(DefRegs); + setUsed(KillRegUnits); + setUnused(DefRegUnits); } if (MBBI == MBB->begin()) { @@ -208,7 +205,7 @@ void RegScavenger::forward() { if (MO.isUse()) { if (MO.isUndef()) continue; - if (!isUsed(Reg)) { + if (!isRegUsed(Reg)) { // Check if it's partial live: e.g. // D0 = insert_subreg D0<undef>, S0 // ... D0 @@ -219,15 +216,23 @@ void RegScavenger::forward() { // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - if (isUsed(*SubRegs)) { + if (isRegUsed(*SubRegs)) { SubUsed = true; break; } - if (!SubUsed) { + bool SuperUsed = false; + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) { + if (isRegUsed(*SR)) { + SuperUsed = true; + break; + } + } + if (!SubUsed && !SuperUsed) { MBB->getParent()->verify(nullptr, "In Register Scavenger"); llvm_unreachable("Using an undefined register!"); } (void)SubUsed; + (void)SuperUsed; } } else { assert(MO.isDef()); @@ -243,23 +248,23 @@ void RegScavenger::forward() { #endif // NDEBUG // Commit the changes. - setUnused(KillRegs); - setUsed(DefRegs); + setUnused(KillRegUnits); + setUsed(DefRegUnits); } -void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { - used = RegsAvailable; - used.flip(); - if (includeReserved) - used |= MRI->getReservedRegs(); - else - used.reset(MRI->getReservedRegs()); +bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { + if (includeReserved && isReserved(Reg)) + return true; + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + if (!RegUnitsAvailable.test(*RUI)) + return true; + return false; } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (!isAliasUsed(*I)) { + if (!isRegUsed(*I)) { DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << "\n"); return *I; @@ -273,13 +278,13 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { BitVector Mask(TRI->getNumRegs()); for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (!isAliasUsed(*I)) + if (!isRegUsed(*I)) Mask.set(*I); return Mask; } /// findSurvivorReg - Return the candidate register that is unused for the -/// longest after StargMII. UseMI is set to the instruction where the search +/// longest after StartMII. UseMI is set to the instruction where the search /// stopped. /// /// No more than InstrLimit instructions are inspected. @@ -375,9 +380,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Try to find a register that's unused if there is one, as then we won't - // have to spill. Search explicitly rather than masking out based on - // RegsAvailable, as RegsAvailable does not take aliases into account. - // That's what getRegsAvailable() is for. + // have to spill. BitVector Available = getRegsAvailable(RC); Available &= Candidates; if (Available.any()) @@ -388,7 +391,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. - if (!isAliasUsed(SReg)) { + if (!isRegUsed(SReg)) { DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); return SReg; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 6a2a08096461..6f8b337c6512 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -21,6 +21,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -35,11 +36,9 @@ static cl::opt<bool> StressSchedOpt( void SchedulingPriorityQueue::anchor() { } ScheduleDAG::ScheduleDAG(MachineFunction &mf) - : TM(mf.getTarget()), - TII(TM.getInstrInfo()), - TRI(TM.getRegisterInfo()), - MF(mf), MRI(mf.getRegInfo()), - EntrySU(), ExitSU() { + : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()), + TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf), + MRI(mf.getRegInfo()), EntrySU(), ExitSU() { #ifndef NDEBUG StressSched = StressSchedOpt; #endif diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 0f8b21c1c1f3..409e7041907f 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -44,18 +44,17 @@ using namespace llvm; static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable use of AA during MI GAD construction")); + cl::desc("Enable use of AA during MI DAG construction")); static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, - cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction")); + cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, - const MachineLoopInfo &mli, - const MachineDominatorTree &mdt, + const MachineLoopInfo *mli, bool IsPostRAFlag, bool RemoveKillFlags, LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); @@ -64,7 +63,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, "Virtual registers must be removed prior to PostRA scheduling"); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); + SchedModel.init(ST.getSchedModel(), &ST, TII); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -110,7 +109,7 @@ static void getUnderlyingObjects(const Value *V, for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { V = *I; - if (!Visited.insert(V)) + if (!Visited.insert(V).second) continue; if (Operator::getOpcode(V) == Instruction::IntToPtr) { const Value *O = @@ -512,9 +511,18 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineInstr *MIa, MachineInstr *MIb) { + const MachineFunction *MF = MIa->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Cover a trivial case - no edge is need to itself. if (MIa == MIb) return false; + + // Let the target decide if memory accesses cannot possibly overlap. + if ((MIa->mayLoad() || MIa->mayStore()) && + (MIb->mayLoad() || MIb->mayStore())) + if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) + return false; // FIXME: Need to handle multiple memory operands to support all targets. if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) @@ -563,9 +571,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, AliasAnalysis::AliasResult AAResult = AA->alias( AliasAnalysis::Location(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getTBAAInfo() : nullptr), + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), AliasAnalysis::Location(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getTBAAInfo() : nullptr)); + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != AliasAnalysis::NoAlias); } @@ -575,12 +583,12 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, - SmallPtrSet<const SUnit*, 16> &Visited) { + SmallPtrSetImpl<const SUnit*> &Visited) { if (!SUa || !SUb || SUb == ExitSU) return *Depth; // Remember visited nodes. - if (!Visited.insert(SUb)) + if (!Visited.insert(SUb).second) return *Depth; // If there is _some_ dependency already in place, do not // descend any further. @@ -606,10 +614,10 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, } // Track current depth. (*Depth)++; - // Iterate over chain dependencies only. + // Iterate over memory dependencies only. for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); I != E; ++I) - if (I->isCtrl()) + if (I->isNormalMemoryOrBarrier()) iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); return *Depth; } @@ -636,11 +644,12 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); (*I)->addPred(Dep); } - // Now go through all the chain successors and iterate from them. - // Keep track of visited nodes. + + // Iterate recursively over all previously added memory chain + // successors. Keep track of visited nodes. for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), JE = (*I)->Succs.end(); J != JE; ++J) - if (J->isCtrl()) + if (J->isNormalMemoryOrBarrier()) iterateChainSucc (AA, MFI, SU, J->getSUnit(), ExitSU, &Depth, Visited); } @@ -656,7 +665,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -883,7 +892,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // fall-through new_alias_chain: - // Chain all possibly aliasing memory references though SU. + // Chain all possibly aliasing memory references through SU. if (AliasChain) { unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) @@ -912,6 +921,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { + // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); @@ -981,12 +997,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, TrueMemOrderLatency); } - // Add dependence on barrier chain, if needed. - // There is no point to check aliasing on barrier event. Even if - // SU and barrier _could_ be reordered, they should not. In addition, - // we have lost all RejectMemNodes below barrier. - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index f59c6cfd8284..b2e4617720ff 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include <fstream> using namespace llvm; diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 004c685bf39a..38833a4165a3 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -78,7 +78,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); else { // A nonempty itinerary must have a SchedModel. - IssueWidth = ItinData->SchedModel->IssueWidth; + IssueWidth = ItinData->SchedModel.IssueWidth; DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " << ScoreboardDepth << '\n'); } diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 75e816720f57..fbedf2c1d17a 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + StatepointLowering.cpp ScheduleDAGVLIW.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2abcdd524512..5145731f6231 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17,6 +17,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,7 +34,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -76,6 +77,10 @@ namespace { "slicing"), cl::init(false)); + static cl::opt<bool> + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -87,61 +92,70 @@ namespace { bool LegalTypes; bool ForCodeSize; - // Worklist of all of the nodes that need to be simplified. - // - // This has the semantics that when adding to the worklist, - // the item added must be next to be processed. It should - // also only appear once. The naive approach to this takes - // linear time. - // - // To reduce the insert/remove time to logarithmic, we use - // a set and a vector to maintain our worklist. - // - // The set contains the items on the worklist, but does not - // maintain the order they should be visited. - // - // The vector maintains the order nodes should be visited, but may - // contain duplicate or removed nodes. When choosing a node to - // visit, we pop off the order stack until we find an item that is - // also in the contents set. All operations are O(log N). - SmallPtrSet<SDNode*, 64> WorklistContents; - SmallVector<SDNode*, 64> WorklistOrder; + /// \brief Worklist of all of the nodes that need to be simplified. + /// + /// This must behave as a stack -- new nodes to process are pushed onto the + /// back and when processing we pop off of the back. + /// + /// The worklist will not contain duplicates but may contain null entries + /// due to nodes being deleted from the underlying DAG. + SmallVector<SDNode *, 64> Worklist; + + /// \brief Mapping from an SDNode to its position on the worklist. + /// + /// This is used to find and remove nodes from the worklist (by nulling + /// them) when they are deleted from the underlying DAG. It relies on + /// stable indices of nodes within the worklist. + DenseMap<SDNode *, unsigned> WorklistMap; + + /// \brief Set of nodes which have been combined (at least once). + /// + /// This is used to allow us to reliably add any operands of a DAG node + /// which have not yet been combined to the worklist. + SmallPtrSet<SDNode *, 64> CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; - /// AddUsersToWorklist - When an instruction is simplified, add all users of - /// the instruction to the work lists because they might get more simplified - /// now. - /// + /// When an instruction is simplified, add all users of the instruction to + /// the work lists because they might get more simplified now. void AddUsersToWorklist(SDNode *N) { for (SDNode *Node : N->uses()) AddToWorklist(Node); } - /// visit - call the node-specific routine that knows how to fold each - /// particular type of node. + /// Call the node-specific routine that folds each particular type of node. SDValue visit(SDNode *N); public: - /// AddToWorklist - Add to the work list making sure its instance is at the - /// back (next to be processed.) + /// Add to the worklist making sure its instance is at the back (next to be + /// processed.) void AddToWorklist(SDNode *N) { // Skip handle nodes as they can't usefully be combined and confuse the // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) return; - WorklistContents.insert(N); - WorklistOrder.push_back(N); + if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) + Worklist.push_back(N); } - /// removeFromWorklist - remove all instances of N from the worklist. - /// + /// Remove all instances of N from the worklist. void removeFromWorklist(SDNode *N) { - WorklistContents.erase(N); + CombinedNodes.erase(N); + + auto It = WorklistMap.find(N); + if (It == WorklistMap.end()) + return; // Not in the worklist. + + // Null out the entry rather than erasing it to avoid a linear operation. + Worklist[It->second] = nullptr; + WorklistMap.erase(It); } + void deleteAndRecombine(SDNode *N); + bool recursivelyDeleteUnusedNodes(SDNode *N); + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); @@ -159,9 +173,9 @@ namespace { private: - /// SimplifyDemandedBits - Check the specified integer node value to see if - /// it can be simplified or if things it uses can be simplified by bit - /// propagation. If so, return true. + /// Check the specified integer node value to see if it can be simplified or + /// if things it uses can be simplified by bit propagation. + /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); APInt Demanded = APInt::getAllOnesValue(BitWidth); @@ -172,8 +186,19 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -187,7 +212,7 @@ namespace { SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); - /// combine - call the node-specific routine that knows how to fold each + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. SDValue combine(SDNode *N); @@ -251,6 +276,7 @@ namespace { SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); + SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); @@ -264,6 +290,8 @@ namespace { SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); + SDValue visitFMINNUM(SDNode *N); + SDValue visitFMAXNUM(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -275,6 +303,8 @@ namespace { SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitMLOAD(SDNode *N); + SDValue visitMSTORE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -299,7 +329,12 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildReciprocalEstimate(SDValue Op); + SDValue BuildRsqrtEstimate(SDValue Op); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -316,17 +351,16 @@ namespace { SDValue GetDemandedBits(SDValue V, const APInt &Mask); - /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl<SDValue> &Aliases); - /// isAlias - Return true if there is any possibility that the two addresses - /// overlap. + /// Return true if there is any possibility that the two addresses overlap. bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; - /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, - /// looking for a better chain (aliasing node.) + /// Walk up chain skipping non-aliasing memory nodes, looking for a better + /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); /// Merge consecutive store operations into a wide store. @@ -354,13 +388,13 @@ namespace { FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); } - /// Run - runs the dag combiner on all nodes in the work list + /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); SelectionDAG &getDAG() const { return DAG; } - /// getShiftAmountTy - Returns a type large enough to hold any valid - /// shift amount - before type legalization these can be huge. + /// Returns a type large enough to hold any valid shift amount - before type + /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) @@ -369,15 +403,14 @@ namespace { : TLI.getPointerTy(); } - /// isTypeLegal - This method returns true if we are running before type - /// legalization or if the specified VT is legal. + /// This method returns true if we are running before type legalization or + /// if the specified VT is legal. bool isTypeLegal(const EVT &VT) { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } - /// getSetCCResultType - Convenience wrapper around - /// TargetLowering::getSetCCResultType + /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); } @@ -386,7 +419,7 @@ namespace { namespace { -/// WorklistRemover - This class is a DAGUpdateListener that removes any deleted +/// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; @@ -437,9 +470,24 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Helper Functions //===----------------------------------------------------------------------===// -/// isNegatibleForFree - Return 1 if we can compute the negated form of the -/// specified expression for the same cost as the expression itself, or 2 if we -/// can compute the negated form more cheaply than the expression itself. +void DAGCombiner::deleteAndRecombine(SDNode *N) { + removeFromWorklist(N); + + // If the operands of this node are only used by the node, they will now be + // dead. Make sure to re-visit them and recursively delete dead nodes. + for (const SDValue &Op : N->ops()) + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorklist(Op.getNode()); + + DAG.DeleteNode(N); +} + +/// Return 1 if we can compute the negated form of the specified expression for +/// the same cost as the expression itself, or 2 if we can compute the negated +/// form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, @@ -502,10 +550,10 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, } } -/// GetNegatedExpression - If isNegatibleForFree returns true, this function -/// returns the newly negated expression. +/// If isNegatibleForFree returns true, return the newly negated expression. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth = 0) { + const TargetOptions &Options = DAG.getTarget().Options; // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); @@ -522,12 +570,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(DAG.getTarget().Options.UnsafeFPMath); + assert(Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -539,7 +586,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(DAG.getTarget().Options.UnsafeFPMath); + assert(Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) @@ -552,12 +599,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); + assert(!Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -582,7 +628,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } -// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to // the appropriate nodes based on the type of node we are checking. This @@ -601,15 +647,19 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, !TLI.isConstFalseVal(N.getOperand(3).getNode())) return false; + if (TLI.getBooleanContents(N.getValueType()) == + TargetLowering::UndefinedBooleanContent) + return false; + LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(4); return true; } -// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only -// one use. If this is true, it allows the users to invert the operation for -// free when it is profitable to do so. +/// Return true if this is a SetCC-equivalent operation with only one use. +/// If this is true, it allows the users to invert the operation for free when +/// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) @@ -617,7 +667,7 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// Returns true if N is a BUILD_VECTOR node whose /// elements are all the same constant or undefined. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); @@ -638,7 +688,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) return N.getNode(); BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); - if(BV && BV->isConstant()) + if (BV && BV->isConstant()) return BV; return nullptr; } @@ -664,6 +714,23 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) { return nullptr; } +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// float. +static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + + if (CN && UndefElements.none()) + return CN; + } + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -671,10 +738,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one @@ -692,10 +758,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + return SDValue(); } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one @@ -720,11 +785,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, N->dump(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump(&DAG); - dbgs() << " and " << NumTo-1 << " other values\n"; - for (unsigned i = 0, e = NumTo; i != e; ++i) - assert((!To[i].getNode() || - N->getValueType(i) == To[i].getValueType()) && - "Cannot combine value to value of different type!")); + dbgs() << " and " << NumTo-1 << " other values\n"); + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!"); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { @@ -740,14 +806,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (N->use_empty()) { - // Nodes can be reintroduced into the worklist. Make sure we do not - // process a node that has been replaced. - removeFromWorklist(N); - - // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); - } + if (N->use_empty()) + deleteAndRecombine(N); return SDValue(N, 0); } @@ -765,22 +825,12 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (TLO.Old.getNode()->use_empty()) { - removeFromWorklist(TLO.Old.getNode()); - - // If the operands of this node are only used by the node, they will now - // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) - if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorklist(TLO.Old.getNode()->getOperand(i).getNode()); - - DAG.DeleteNode(TLO.Old.getNode()); - } + if (TLO.Old.getNode()->use_empty()) + deleteAndRecombine(TLO.Old.getNode()); } -/// SimplifyDemandedBits - Check the specified integer node value to see if -/// it can be simplified or if things it uses can be simplified by bit -/// propagation. If so, return true. +/// Check the specified integer node value to see if it can be simplified or if +/// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; @@ -815,8 +865,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - removeFromWorklist(Load); - DAG.DeleteNode(Load); + deleteAndRecombine(Load); AddToWorklist(Trunc.getNode()); } @@ -826,8 +875,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, @@ -889,9 +938,9 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { return DAG.getZeroExtendInReg(NewOp, dl, OldVT); } -/// PromoteIntBinOp - Promote the specified integer binary operation if the -/// target indicates it is beneficial. e.g. On x86, it's usually better to -/// promote i16 operations to i32 since i16 instructions are longer. +/// Promote the specified integer binary operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (!LegalOperations) return SDValue(); @@ -947,9 +996,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { return SDValue(); } -/// PromoteIntShiftOp - Promote the specified integer shift operation if the -/// target indicates it is beneficial. e.g. On x86, it's usually better to -/// promote i16 operations to i32 since i16 instructions are longer. +/// Promote the specified integer shift operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (!LegalOperations) return SDValue(); @@ -1048,8 +1097,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), @@ -1064,14 +1113,42 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - removeFromWorklist(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); AddToWorklist(Result.getNode()); return true; } return false; } +/// \brief Recursively delete a node which has no uses and any operands for +/// which it is the only use. +/// +/// Note that this both deletes the nodes and removes them from the worklist. +/// It also adds any nodes who have had a user deleted to the worklist as they +/// may now have only one use and subject to other combines. +bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { + if (!N->use_empty()) + return false; + + SmallSetVector<SDNode *, 16> Nodes; + Nodes.insert(N); + do { + N = Nodes.pop_back_val(); + if (!N) + continue; + + if (N->use_empty()) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Nodes.insert(N->getOperand(i).getNode()); + + removeFromWorklist(N); + DAG.DeleteNode(N); + } else { + AddToWorklist(N); + } + } while (!Nodes.empty()); + return true; +} //===----------------------------------------------------------------------===// // Main DAG Combiner implementation @@ -1083,6 +1160,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; + // Early exit if this basic block is in an optnone function. + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeNone)) + return; + // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) @@ -1093,34 +1177,52 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // The root of the dag may dangle to deleted nodes until the dag combiner is - // done. Set it to null to avoid confusion. - DAG.setRoot(SDValue()); - // while the worklist isn't empty, find a node and // try and combine it. - while (!WorklistContents.empty()) { + while (!WorklistMap.empty()) { SDNode *N; - // The WorklistOrder holds the SDNodes in order, but it may contain - // duplicates. - // In order to avoid a linear scan, we use a set (O(log N)) to hold what the - // worklist *should* contain, and check the node we want to visit is should - // actually be visited. + // The Worklist holds the SDNodes in order, but it may contain null entries. do { - N = WorklistOrder.pop_back_val(); - } while (!WorklistContents.erase(N)); + N = Worklist.pop_back_val(); + } while (!N); + + bool GoodWorklistEntry = WorklistMap.erase(N); + (void)GoodWorklistEntry; + assert(GoodWorklistEntry && + "Found a worklist entry without a corresponding map entry!"); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. - if (N->use_empty()) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorklist(N->getOperand(i).getNode()); - - DAG.DeleteNode(N); + if (recursivelyDeleteUnusedNodes(N)) continue; + + WorklistRemover DeadNodes(*this); + + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector<SDNode *, 16> UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + + for (SDNode *LN : UpdatedNodes) { + AddToWorklist(LN); + AddUsersToWorklist(LN); + } + if (!NIsValid) + continue; } + DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + + // Add any operands of the new node which have not yet been combined to the + // worklist as well. Because the worklist uniques things already, this + // won't repeatedly process the same operand. + CombinedNodes.insert(N); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!CombinedNodes.count(N->getOperand(i).getNode())) + AddToWorklist(N->getOperand(i).getNode()); + SDValue RV = combine(N); if (!RV.getNode()) @@ -1139,15 +1241,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << "\nReplacing.3 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - RV.getNode()->dump(&DAG); - dbgs() << '\n'); + DEBUG(dbgs() << " ... into: "; + RV.getNode()->dump(&DAG)); // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); - WorklistRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1161,23 +1259,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { AddToWorklist(RV.getNode()); AddUsersToWorklist(RV.getNode()); - // Add any uses of the old node to the worklist in case this node is the - // last one that uses them. They may become dead after this node is - // deleted. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorklist(N->getOperand(i).getNode()); - // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to - // something else needing this node. - if (N->use_empty()) { - // Nodes can be reintroduced into the worklist. Make sure we do not - // process a node that has been replaced. - removeFromWorklist(N); - - // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); - } + // something else needing this node. This will also take care of adding any + // operands which have lost a user to the worklist. + recursivelyDeleteUnusedNodes(N); } // If the root changed (e.g. it was a dead load, update the root). @@ -1239,6 +1325,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); + case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); @@ -1250,6 +1337,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FMINNUM: return visitFMINNUM(N); + case ISD::FMAXNUM: return visitFMAXNUM(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); @@ -1263,6 +1352,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSTORE: return visitMSTORE(N); } return SDValue(); } @@ -1342,8 +1433,8 @@ SDValue DAGCombiner::combine(SDNode *N) { return RV; } -/// getInputChainForNode - Given a node, return its input chain if it has one, -/// otherwise return a null sd operand. +/// Given a node, return its input chain if it has one, otherwise return a null +/// sd operand. static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) @@ -1405,7 +1496,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { default: // Only add if it isn't already in the list. - if (SeenOps.insert(Op.getNode())) + if (SeenOps.insert(Op.getNode()).second) Ops.push_back(Op); else Changed = true; @@ -1446,33 +1537,10 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); - removeFromWorklist(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static -SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, - SelectionDAG &DAG) { - EVT VT = N0.getValueType(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); - - if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && - isa<ConstantSDNode>(N00.getOperand(1))) { - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, - DAG.getNode(ISD::SHL, SDLoc(N00), VT, - N00.getOperand(0), N01), - DAG.getNode(ISD::SHL, SDLoc(N01), VT, - N00.getOperand(1), N01)); - return DAG.getNode(ISD::ADD, DL, VT, N0, N1); - } - - return SDValue(); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1589,16 +1657,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } } - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); - if (Result.getNode()) return Result; - } - if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); - if (Result.getNode()) return Result; - } - // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB) @@ -1642,6 +1700,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add X, (sextinreg Y i1) -> sub X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); + } + } + return SDValue(); } @@ -1807,6 +1876,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { VT); } + // sub X, (sextinreg Y i1) -> add X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, VT)); + return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); + } + } + return SDValue(); } @@ -2011,9 +2091,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. - if (TLI.isPow2DivCheap()) + if (TLI.isPow2SDivCheap()) return SDValue(); + // Target-specific implementation of sdiv x, pow2. + SDValue Res = BuildSDIVPow2(N); + if (Res.getNode()) + return Res; + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register @@ -2281,10 +2366,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return SDValue(); } -/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that -/// compute two values. LoOp and HiOp give the opcodes for the two computations -/// that are being performed. Return true if a simplification was made. -/// +/// Perform optimizations common to nodes that compute two values. LoOp and HiOp +/// give the opcodes for the two computations that are being performed. Return +/// true if a simplification was made. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { // If the high half is not needed, just compute the low half. @@ -2292,8 +2376,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); return CombineTo(N, Res, Res); } @@ -2302,8 +2385,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); return CombineTo(N, Res, Res); } @@ -2313,8 +2395,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && @@ -2324,8 +2405,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && @@ -2431,8 +2511,8 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { return SDValue(); } -/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with -/// two operands of the same opcode, try to simplify it. +/// If this is a binary operator with two operands of the same opcode, try to +/// simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); EVT VT = N0.getValueType(); @@ -2445,6 +2525,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine @@ -2452,6 +2533,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::BSWAP || // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || @@ -2598,9 +2680,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getNullValue( + N0.getValueType().getScalarType().getSizeInBits()), + N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getNullValue( + N1.getValueType().getScalarType().getSizeInBits()), + N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2708,6 +2798,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // actually legal and isn't going to get expanded, else this is a false // optimisation. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getValueType(0), Load->getMemoryVT()); // Resize the constant to the same size as the original memory access before @@ -2834,7 +2925,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2854,7 +2945,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2880,10 +2971,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); EVT LoadedVT = LN0->getMemoryVT(); + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, @@ -2898,7 +2990,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -2918,13 +3011,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment, LN0->getTBAAInfo()); + LN0->isInvariant(), Alignment, LN0->getAAInfo()); AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2971,8 +3063,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } -/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 -/// +/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits) { if (!LegalOperations) @@ -3077,10 +3168,13 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, return Res; } -/// isBSwapHWordElement - Return true if the specified node is an element -/// that makes up a 32-bit packed halfword byteswap. i.e. -/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { +/// Return true if the specified node is an element that makes up a 32-bit +/// packed halfword byteswap. +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3147,8 +3241,11 @@ static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { return true; } -/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is -/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// Match a 32-bit packed halfword bswap. That is +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) /// => (rotl (bswap x), 16) SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!LegalOperations) @@ -3160,7 +3257,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) @@ -3168,6 +3264,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); + SDNode *Parts[4] = {}; if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { @@ -3241,9 +3338,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, -1) -> -1, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getAllOnesValue( + N0.getValueType().getScalarType().getSizeInBits()), + N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getAllOnesValue( + N1.getValueType().getScalarType().getSizeInBits()), + N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) @@ -3342,12 +3447,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); - if (!COR.getNode()) - return SDValue(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), COR); + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) @@ -3435,7 +3539,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return SDValue(); } -/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +/// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { if (isa<ConstantSDNode>(Op.getOperand(1))) { @@ -3732,7 +3836,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return RXOR; // fold !(x cc y) -> (x !cc y) - if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), isInt); @@ -3825,8 +3929,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } -/// visitShiftByConstant - Handle transforms common to the three shifts, when -/// the shift amount is a constant. +/// Handle transforms common to the three shifts, when the shift amount is a +/// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { // We can't and shouldn't fold opaque constants. if (Amt->isOpaque()) @@ -3963,8 +4067,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); - if (C.getNode()) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4098,6 +4201,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } + // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Variant of version done on multiply, except mul by a power of 2 is turned + // into a shift. + APInt Val; + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isa<ConstantSDNode>(N0.getOperand(1)) || + isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + } + if (N1C) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) @@ -4498,6 +4613,43 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } + +/// \brief Generate Min/Max node +static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, + SDValue True, SDValue False, + ISD::CondCode CC, const TargetLowering &TLI, + SelectionDAG &DAG) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + switch (CC) { + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: { + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: { + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + default: + return SDValue(); + } +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4577,9 +4729,31 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + SDValue FMinMax = + combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), + N1, N2, CC, TLI, DAG); + if (FMinMax) + return FMinMax; + } + if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4613,12 +4787,17 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - MVT VT = N->getSimpleValueType(0); + EVT VT = N->getValueType(0); int NumElems = VT.getVectorNumElements(); assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR); + // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about + // binary ones here. + if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) + return SDValue(); + // We're sure we have an even number of elements due to the // concat_vectors we have as arguments to vselect. // Skip BV elements until we find one that's not an UNDEF @@ -4656,6 +4835,162 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSTORE(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + SDValue Data = MST->getData(); + SDLoc DL(N); + + // If the MSTORE data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() == ISD::SETCC) { + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); + + SDValue Chain = MST->getChain(); + SDValue Ptr = MST->getBasePtr(); + + EVT MemoryVT = MST->getMemoryVT(); + unsigned Alignment = MST->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MST->getAAInfo(), MST->getRanges()); + + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, MST->getAAInfo(), + MST->getRanges()); + + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + return SDValue(); +} + +SDValue DAGCombiner::visitMLOAD(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); + SDValue Mask = MLD->getMask(); + SDLoc DL(N); + + // If the MLOAD result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Chain = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + EVT MemoryVT = MLD->getMemoryVT(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); + + SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { LoadRes, Chain }; + return DAG.getMergeValues(RetOps, DL); + } + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4765,13 +5100,16 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } - - // Fold to a simpler select_cc - if (SCC.getOpcode() == ISD::SETCC) + } else if (SCC->getOpcode() == ISD::UNDEF) { + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + return N2; + } else if (SCC.getOpcode() == ISD::SETCC) { + // Fold to a simpler select_cc return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + } } // If we can fold this based on the true/false value, do so. @@ -5004,7 +5342,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5032,7 +5370,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5052,7 +5390,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { @@ -5131,14 +5469,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { SDLoc DL(N); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - SDValue SetCC = DAG.getSetCC(DL, - SetCCVT, + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); - EVT SelectVT = getSetCCResultType(VT); - return DAG.getSelect(DL, VT, - DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + return DAG.getSelect(DL, VT, SetCC, NegOne, DAG.getConstant(0, VT)); - } } } @@ -5298,7 +5632,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5326,7 +5660,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { @@ -5363,7 +5697,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5525,7 +5859,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5555,7 +5889,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -5610,9 +5944,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } -/// GetDemandedBits - See if the specified operand can be simplified with the -/// knowledge that only the bits specified by Mask are used. If so, return the -/// simpler operand, otherwise return a null SDValue. +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by Mask are used. If so, return the simpler operand, +/// otherwise return a null SDValue. SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; @@ -5653,11 +5987,11 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { return SDValue(); } -/// ReduceLoadWidth - If the result of a wider load is shifted to right of N -/// bits and then truncated to a narrower type and where N is a multiple -/// of number of bits of the narrower type, transform it to a narrower load -/// from address + N / num of bits of new type. If the result is to be -/// extended, also fold the extension to form a extending load. +/// If the result of a wider load is shifted to right of N bits and then +/// truncated to a narrower type and where N is a multiple of number of bits of +/// the narrower type, transform it to a narrower load from address + N / num of +/// bits of new type. If the result is to be extended, also fold the extension +/// to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); @@ -5684,7 +6018,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5763,6 +6097,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) return SDValue(); + if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5789,12 +6126,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign, LN0->getTBAAInfo()); + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); @@ -5888,7 +6225,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -5904,7 +6241,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { N0.hasOneUse() && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6152,7 +6489,7 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { return Elt.getOperand(Elt.getResNo()).getNode(); } -/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// build_pair (load, load) -> load /// if load locations are consecutive. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); @@ -6218,7 +6555,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // Ideally this won't happen very often, because instcombine // and the earlier dagcombine runs (where illegal nodes are // permitted) should have folded most of them already. - DAG.DeleteNode(Res.getNode()); + deleteAndRecombine(Res.getNode()); } } @@ -6247,12 +6584,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), OrigAlign, - LN0->getTBAAInfo()); - AddToWorklist(N); - CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, SDLoc(N0), - N0.getValueType(), Load), - Load.getValue(1)); + LN0->getAAInfo()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } @@ -6337,9 +6670,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } -/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector -/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the -/// destination element value type. +/// We know that BV is a build_vector node with Constant, ConstantFP or Undef +/// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); @@ -6383,7 +6715,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (SrcEltVT.isFloatingPoint()) { // Convert the input float vector to a int vector where the elements are the // same sizes. - assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; @@ -6392,7 +6723,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Now we know the input is an integer vector. If the output is a FP type, // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { - assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); @@ -6475,6 +6805,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6485,195 +6816,197 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); + // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); - // fold (fadd A, 0) -> A - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N1CFP->getValueAPF().isZero()) - return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); - // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, SDLoc(N), VT, - N0.getOperand(1), N1)); - - // No FP constant should be created after legalization as Instruction - // Selection pass has hard time in dealing with FP constant. - // - // We don't need test this condition for transformation like following, as - // the DAG being transformed implies it is legal to take FP constant as - // operand. - // - // (fadd (fmul c, x), x) -> (fmul c+1, x) - // - bool AllowNewFpConst = (Level < AfterLegalizeDAG); - - // If allow, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); - // If allow, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); - - // In unsafe math mode, we can fold chains of FADD's of the same value - // into multiplications. This transform is not safe in general because - // we are reducing the number of rounding steps. - if (DAG.getTarget().Options.UnsafeFPMath && - TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && - !N0CFP && !N1CFP) { - if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - - // (fadd (fmul c, x), x) -> (fmul x, c+1) - if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP00, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, NewCFP); - } + // fold (fadd A, 0) -> A + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N0; - // (fadd (fmul x, c), x) -> (fmul x, c+1) - if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, NewCFP); - } + // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, + N0.getOperand(1), N1)); + + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, VT); + + // We can fold chains of FADD's of the same value into multiplications. + // This transform is not safe in general because we are reducing the number + // of rounding steps. + if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul x, c), x) -> (fmul x, c+1) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) - if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP00, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), NewCFP); + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N0.getOperand(0), NewCFP); + } } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) - if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); - } - } + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); - if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + // (fadd x, (fmul x, c)) -> (fmul x, c+1) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + } - // (fadd x, (fmul c, x)) -> (fmul x, c+1) - if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP10, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, NewCFP); + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + } } - // (fadd x, (fmul x, c)) -> (fmul x, c+1) - if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, NewCFP); + if (N0.getOpcode() == ISD::FADD && AllowNewConst) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul x, 3.0) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1, DAG.getConstantFP(3.0, VT)); } - - // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) - if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && - N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(1) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP10, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1.getOperand(1), NewCFP); + if (N1.getOpcode() == ISD::FADD && AllowNewConst) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N0, DAG.getConstantFP(3.0, VT)); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) - if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) + if (AllowNewConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1.getOperand(0), NewCFP); - } - } - - if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { - ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); - } - - if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul x, 3.0) - if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N0.getOperand(0), DAG.getConstantFP(4.0, VT)); } - - // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) - if (AllowNewFpConst && - N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && - N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), - DAG.getConstantFP(4.0, VT)); - } + } // enable-unsafe-fp-math // FADD -> FMA combines: - if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || - DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), N0); + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } } return SDValue(); @@ -6682,10 +7015,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6696,60 +7030,60 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); - // fold (fsub A, 0) -> A - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N1CFP->getValueAPF().isZero()) - return N0; - // fold (fsub 0, B) -> -B - if (DAG.getTarget().Options.UnsafeFPMath && - N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return GetNegatedExpression(N1, DAG, LegalOperations); - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, dl, VT, N1); - } + // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); - // If 'unsafe math' is enabled, fold - // (fsub x, x) -> 0.0 & - // (fsub x, (fadd x, y)) -> (fneg y) & - // (fsub x, (fadd y, x)) -> (fneg y) - if (DAG.getTarget().Options.UnsafeFPMath) { + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // (fsub A, 0) -> A + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, dl, VT, N1); + } + + // (fsub x, x) -> 0.0 if (N0 == N1) return DAG.getConstantFP(0.0f, VT); + // (fsub x, (fadd x, y)) -> (fneg y) + // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { SDValue N10 = N1->getOperand(0); SDValue N11 = N1->getOperand(1); - if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, - &DAG.getTarget().Options)) + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } // FSUB -> FMA combines: - if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || - DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), @@ -6758,13 +7092,115 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || + TLI.enableAggressiveFMAFusion(VT))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N00), N01, DAG.getNode(ISD::FNEG, dl, VT, N1)); } + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); @@ -6773,47 +7209,82 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { + // This just handles C1 * C2 for vectors. Other vector folds are below. SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (FoldedVOp.getNode()) + return FoldedVOp; + // Canonicalize vector constant to RHS. + if (N0.getOpcode() == ISD::BUILD_VECTOR && + N1.getOpcode() != ISD::BUILD_VECTOR) + if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) + if (BV0->isConstant()) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); + // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); - // fold (fmul A, 0) -> 0 - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N1CFP->getValueAPF().isZero()) - return N1; - // fold (fmul A, 0) -> 0, vector edition. - if (DAG.getTarget().Options.UnsafeFPMath && - ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; + // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) return N0; + + if (Options.UnsafeFPMath) { + // fold (fmul A, 0) -> 0 + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + + // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (N0.getOpcode() == ISD::FMUL) { + // Fold scalars or any vector constants (not just splats). + // This fold is done in general by InstCombine, but extra fmul insts + // may have been generated during lowering. + SDValue N01 = N0.getOperand(1); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + } + } + + // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) + // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs + // during an early run of DAGCombiner can prevent folding with fmuls + // inserted during lowering. + if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + SDLoc SL(N); + const SDValue Two = DAG.getConstantFP(2.0, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + } + } + // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, - &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, - &DAG.getTarget().Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -6823,14 +7294,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } - // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N0.getOpcode() == ISD::FMUL && - N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), N1)); - return SDValue(); } @@ -6842,8 +7305,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; - if (DAG.getTarget().Options.UnsafeFPMath) { + // Constant fold FMA. + if (isa<ConstantFPSDNode>(N0) && + isa<ConstantFPSDNode>(N1) && + isa<ConstantFPSDNode>(N2)) { + return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); + } + + if (Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) @@ -6859,7 +7330,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && N2.getOperand(1).getOpcode() == ISD::ConstantFP) { @@ -6869,7 +7340,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (DAG.getTarget().Options.UnsafeFPMath && + if (Options.UnsafeFPMath && N0.getOpcode() == ISD::FMUL && N1CFP && N0.getOperand(1).getOpcode() == ISD::ConstantFP) { return DAG.getNode(ISD::FMA, dl, VT, @@ -6893,13 +7364,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, @@ -6915,7 +7386,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6927,30 +7399,79 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { - // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || - TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + if (Options.UnsafeFPMath) { + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, + DAG.getConstantFP(Recip, VT)); + } + + // If this FDIV is part of a reciprocal square root, it may be folded + // into a target-specific square root estimate instruction. + if (N1.getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FP_ROUND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FMUL) { + // Look through an FMUL. Even though this won't remove the FDIV directly, + // it's still worthwhile to get rid of the FSQRT if possible. + SDValue SqrtOp; + SDValue OtherOp; + if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(0); + OtherOp = N1.getOperand(1); + } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(1); + OtherOp = N1.getOperand(0); + } + if (SqrtOp.getNode()) { + // We found a FSQRT, so try to make this fold: + // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } + } + + // Fold into a reciprocal estimate and multiply instead of a real divide. + if (SDValue RV = BuildReciprocalEstimate(N1)) { + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, - &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, - &DAG.getTarget().Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -6960,6 +7481,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal. + // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) + // Notice that this is not always beneficial. One reason is different target + // may have different costs for FDIV and FMUL, so sometimes the cost of two + // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason + // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". + if (Options.UnsafeFPMath) { + // Skip if current node is a reciprocal. + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + SmallVector<SDNode *, 4> Users; + // Find all FDIV users of the same divisor. + for (SDNode::use_iterator UI = N1.getNode()->use_begin(), + UE = N1.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = UI.getUse().getUser(); + if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) + Users.push_back(User); + } + + if (TLI.combineRepeatedFPDivisors(Users.size())) { + SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 + SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { + if ((*I)->getOperand(0) != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, + (*I)->getOperand(0), Reciprocal); + DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + } + } + return SDValue(); + } + } + return SDValue(); } @@ -6977,6 +7536,32 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFSQRT(SDNode *N) { + if (DAG.getTarget().Options.UnsafeFPMath && + !TLI.isFsqrtCheap()) { + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { + EVT VT = RV.getValueType(); + RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); + + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue ZeroCmp = + DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, + SDLoc(N), VT, ZeroCmp, Zero, RV); + return RV; + } + } + return SDValue(); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7222,7 +7807,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -7239,6 +7824,43 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP) + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP) + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + + return SDValue(); +} + +// FIXME: FNEG and FABS have a lot in common; refactor. SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7248,26 +7870,36 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (FoldedVOp.getNode()) return FoldedVOp; } + // Constant fold FNEG. + if (isa<ConstantFPSDNode>(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); - // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. - // TODO: We can also optimize for vectors here, but we need to make sure - // that the sign mask is created properly for each vector element. - if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && - !VT.isVector() && - N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger()) { + if (!TLI.isFNegFree(VT) && + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x80... per scalar element + // and splat it. + SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x80... + SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + DAG.getConstant(SignMask, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - VT, Int); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -7289,45 +7921,50 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFCEIL(SDNode *N) { +SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - EVT VT = N->getValueType(0); - - // fold (fceil c1) -> fceil(c1) - if (N0CFP) - return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); - - return SDValue(); -} + SDValue N1 = N->getOperand(1); + const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); -SDValue DAGCombiner::visitFTRUNC(SDNode *N) { - SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - EVT VT = N->getValueType(0); + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + } - // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) - return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + if (N0CFP) { + EVT VT = N->getValueType(0); + // Canonicalize to constant on RHS. + return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); + } return SDValue(); } -SDValue DAGCombiner::visitFFLOOR(SDNode *N) { +SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - EVT VT = N->getValueType(0); + SDValue N1 = N->getOperand(1); + const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) - return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + } + + if (N0CFP) { + EVT VT = N->getValueType(0); + // Canonicalize to constant on RHS. + return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); + } return SDValue(); } SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); if (VT.isVector()) { @@ -7336,32 +7973,40 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { } // fold (fabs c1) -> fabs(c1) - if (N0CFP) + if (isa<ConstantFPSDNode>(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); - // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading // constant pool values. - // TODO: We can also optimize for vectors here, but we need to make sure - // that the sign mask is created properly for each vector element. if (!TLI.isFAbsFree(VT) && - N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !VT.isVector()) { + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x7f... per scalar element + // and splat it. + SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x7f... + SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + DAG.getConstant(SignMask, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - N->getValueType(0), Int); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -7441,15 +8086,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // will convert it back to (X & C1) >> C2. CombineTo(N, NewBRCond, false); // Truncate is dead. - if (Trunc) { - removeFromWorklist(Trunc); - DAG.DeleteNode(Trunc); - } + if (Trunc) + deleteAndRecombine(Trunc); // Replace the uses of SRL with SETCC WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorklist(N1.getNode()); - DAG.DeleteNode(N1.getNode()); + deleteAndRecombine(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -7478,8 +8120,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorklist(TheXor); - DAG.DeleteNode(TheXor); + deleteAndRecombine(TheXor); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -7509,8 +8150,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // Replace the uses of XOR with SETCC WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorklist(N1.getNode()); - DAG.DeleteNode(N1.getNode()); + deleteAndRecombine(N1.getNode()); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } @@ -7547,9 +8187,8 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } -/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that -/// uses N as its base pointer and that N may be folded in the load / store -/// addressing mode. +/// Return true if 'Use' is a load or a store that uses N as its base pointer +/// and that N may be folded in the load / store addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -7588,12 +8227,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); } -/// CombineToPreIndexedLoadStore - Try turning a load / store into a -/// pre-indexed load / store when the base pointer is an add or subtract -/// and it has other uses besides the load / store. After the -/// transformation, the new indexed load / store has effectively folded -/// the add / subtract in and all of its other uses are redirected to the -/// new load / store. +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; @@ -7754,7 +8392,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); + deleteAndRecombine(N); if (Swapped) std::swap(BasePtr, Offset); @@ -7804,23 +8442,20 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); - removeFromWorklist(OtherUses[i]); - DAG.DeleteNode(OtherUses[i]); + deleteAndRecombine(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); - removeFromWorklist(Ptr.getNode()); - DAG.DeleteNode(Ptr.getNode()); + deleteAndRecombine(Ptr.getNode()); return true; } -/// CombineToPostIndexedLoadStore - Try to combine a load / store with a -/// add / sub of the base pointer node into a post-indexed load / store. -/// The transformation folded the add / subtract into the new indexed -/// load / store effectively and all of its uses are redirected to the -/// new load / store. +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; @@ -7924,13 +8559,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { } // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); + deleteAndRecombine(N); // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(isLoad ? 1 : 0)); - removeFromWorklist(Op); - DAG.DeleteNode(Op); + deleteAndRecombine(Op); return true; } } @@ -7939,6 +8573,30 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } +/// \brief Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + + // Some backends use TargetConstants for load offsets, but don't expect + // TargetConstants in general ADD nodes. We can convert these constants into + // regular Constants (if the constant is not opaque). + assert((Inc.getOpcode() != ISD::TargetConstant || + !cast<ConstantSDNode>(Inc)->isOpaque()) && + "Cannot split out indexing using opaque target constants"); + if (Inc.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + ConstInc->getValueType(0)); + } + + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -7965,18 +8623,33 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); - if (N->use_empty()) { - removeFromWorklist(N); - DAG.DeleteNode(N); - } + if (N->use_empty()) + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { + + // If this load has an opaque TargetConstant offset, then we cannot split + // the indexing into an add/sub directly (that TargetConstant may not be + // valid for a different type of node, and we cannot convert an opaque + // target constant into a regular constant). + bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && + cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); + + if (!N->hasAnyUseOfValue(0) && + ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorklist(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7984,11 +8657,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); - removeFromWorklist(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8016,15 +8687,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align, - LD->getTBAAInfo()); + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), Align, LD->getAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : - TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -8354,7 +9025,7 @@ struct LoadedSlice { // At this point, we know that we perform a cross-register-bank copy. // Check if it is expensive. - const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); // Assume bitcasts are cheap, unless both register classes do not // explicitly share a common sub class. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) @@ -8618,9 +9289,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return true; } -/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the -/// load is having specific bytes cleared out. If so, return the byte size -/// being masked out and the shift amount. +/// Check to see if V is (and load (ptr), imm), where the load is having +/// specific bytes cleared out. If so, return the byte size being masked out +/// and the shift amount. static std::pair<unsigned, unsigned> CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair<unsigned, unsigned> Result(0, 0); @@ -8693,9 +9364,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { } -/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that -/// provides a value as specified by MaskInfo. If so, replace the specified -/// store with a narrower store of truncated IVal. +/// Check to see if IVal is something that provides a value as specified by +/// MaskInfo. If so, replace the specified store with a narrower store of +/// truncated IVal. static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue IVal, StoreSDNode *St, @@ -8750,10 +9421,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, } -/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is -/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some -/// of the loaded bits, try narrowing the load and store if it would end up -/// being a win for performance or code size. +/// Look for sequence of load / op / store where op is one of 'or', 'xor', and +/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try +/// narrowing the load and store if it would end up being a win for performance +/// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); if (ST->isVolatile()) @@ -8853,7 +9524,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), NewAlign, - LD->getTBAAInfo()); + LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), @@ -8874,10 +9545,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); } -/// TransformFPLoadStorePair - For a given floating point load / store pair, -/// if the load value isn't used by any other operations, then consider -/// transforming the pair to integer load / store operations if the target -/// deems the transformation profitable. +/// For a given floating point load / store pair, if the load value isn't used +/// by any other operations, then consider transforming the pair to integer +/// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -9051,7 +9721,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return false; // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 1); + SDValue Chain = SDValue(St, 0); if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; @@ -9082,7 +9752,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StoreSDNode *Index = St; while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 1)->hasOneUse()) + if (Index != St && !SDValue(Index, 0)->hasOneUse()) break; // Find the base pointer and offset for this memory node. @@ -9279,7 +9949,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); } else { - assert(false && "Invalid constant element type"); + llvm_unreachable("Invalid constant element type"); } } @@ -9313,8 +9983,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Since we know that St is redundant, just iterate. while (!St->use_empty()) DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - removeFromWorklist(St); - DAG.DeleteNode(St); + deleteAndRecombine(St); } return true; @@ -9373,6 +10042,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (LoadNodes.size() < 2) return false; + // If we have load/store pair instructions and we only have two values, + // don't bother. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + St->getAlignment() >= RequiredAlignment) + return false; + // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. @@ -9408,9 +10084,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) LastLegalIntegerType = i+1; } } @@ -9488,8 +10164,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - removeFromWorklist(St); - DAG.DeleteNode(St); + deleteAndRecombine(St); } return true; @@ -9515,7 +10190,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), OrigAlign, - ST->getTBAAInfo()); + ST->getAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -9569,19 +10244,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment(), TBAAInfo); + ST->getAlignment(), AAInfo); Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment, TBAAInfo); + Alignment, AAInfo); return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -9598,7 +10273,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, - ST->getTBAAInfo()); + ST->getAAInfo()); } } @@ -9608,8 +10283,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : - TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -9686,6 +10361,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { + if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && + ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && + ST1->isUnindexed() && !ST1->isVolatile()) { + // The store is dead, remove it. + return Chain; + } + } + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) @@ -9791,6 +10477,87 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + if (TLI.isBigEndian()) + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; + Offset = DAG.getConstant(PtrOff, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getNode( + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); + if (TLI.isBigEndian()) + Offset = DAG.getNode( + ISD::SUB, SDLoc(EVE), EltNo.getValueType(), + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, + VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad( + ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, + VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + } + WorklistRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorklist(Load.getNode()); + AddUsersToWorklist(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); @@ -9859,6 +10626,39 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && + ISD::isNormalLoad(InVec.getNode()) && + !N->getOperand(1)->hasPredecessor(InVec.getNode())) { + SDValue Index = N->getOperand(1); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -9869,30 +10669,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - bool NewLoad = false; - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - - // If the result of load has to be truncated, then it's not necessarily - // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) - return SDValue(); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) - return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) - BCNumEltsChanged = true; - InVec = InVec.getOperand(0); - ExtVT = BCVT.getVectorElementType(); - NewLoad = true; - } LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; @@ -9935,6 +10711,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + EltNo = DAG.getConstant(Elt, EltNo.getValueType()); } } @@ -9947,72 +10724,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - unsigned Align = LN0->getAlignment(); - if (NewLoad) { - // Check the resultant load doesn't need a higher alignment than the - // original load. - unsigned NewAlign = - TLI.getDataLayout() - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) - return SDValue(); - - Align = NewAlign; - } - - SDValue NewPtr = LN0->getBasePtr(); - unsigned PtrOff = 0; - - if (Elt) { - PtrOff = LVT.getSizeInBits() * Elt / 8; - EVT PtrType = NewPtr.getValueType(); - if (TLI.isBigEndian()) - PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, - DAG.getConstant(PtrOff, PtrType)); - } - - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. - SDValue Load; - SDValue Chain; - if (NVT.bitsGT(LVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) - ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), - NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(), - Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - } else { - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); - else - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); - } - WorklistRemover DeadNodes(*this); - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explcitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorklist(Load.getNode()); - AddUsersToWorklist(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(N); - return SDValue(N, 0); + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); } return SDValue(); @@ -10215,32 +10927,46 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT)) + return SDValue(); + // May only combine to shuffle after legalize if shuffle is legal. - if (LegalOperations && - !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); SDValue VecIn1, VecIn2; + bool UsesZeroVector = false; for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (Op.getOpcode() == ISD::UNDEF) continue; + + // See if we can combine this build_vector into a blend with a zero vector. + if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op.getNode())->isNullValue()) || + (Op.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { + UsesZeroVector = true; + continue; + } // If this input is something other than a EXTRACT_VECTOR_ELT with a // constant index, bail out. - if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1))) { VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } // We allow up to two distinct input vectors. - SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + SDValue ExtractedFromVec = Op.getOperand(0); if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (!VecIn2.getNode()) { + } else if (!VecIn2.getNode() && !UsesZeroVector) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. @@ -10251,55 +10977,93 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { + unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + unsigned Opcode = N->getOperand(i).getOpcode(); + if (Opcode == ISD::UNDEF) { Mask.push_back(-1); continue; } + // Operands can also be zero. + if (Opcode != ISD::EXTRACT_VECTOR_ELT) { + assert(UsesZeroVector && + (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && + "Unexpected node found!"); + Mask.push_back(NumInScalars+i); + continue; + } + // If extracting from the first vector, just use the index directly. SDValue Extract = N->getOperand(i); SDValue ExtVal = Extract.getOperand(1); + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); if (Extract.getOperand(0) == VecIn1) { - unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - if (ExtIndex > VT.getVectorNumElements()) - return SDValue(); - Mask.push_back(ExtIndex); continue; } - // Otherwise, use InIdx + VecSize - unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - Mask.push_back(Idx+NumInScalars); + // Otherwise, use InIdx + InputVecSize + Mask.push_back(InNumElements + ExtIndex); } + // Avoid introducing illegal shuffles with zero. + if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + return SDValue(); + // We can't generate a shuffle node with mismatched input and output types. // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { - // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode()) + // If the input vector type has a different base type to the output + // vector type, bail out. + EVT VTElemType = VT.getVectorElementType(); + if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || + (VecIn2.getNode() && + (VecIn2.getValueType().getVectorElementType() != VTElemType))) return SDValue(); + // If the input vector is too small, widen it. // We only support widening of vectors which are half the size of the // output registers. For example XMM->YMM widening on X86 with AVX. - if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) - return SDValue(); + EVT VecInT = VecIn1.getValueType(); + if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { + // If we only have one small input, widen it by adding undef values. + if (!VecIn2.getNode()) + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, + DAG.getUNDEF(VecIn1.getValueType())); + else if (VecIn1.getValueType() == VecIn2.getValueType()) { + // If we have two small inputs of the same type, try to concat them. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); + VecIn2 = SDValue(nullptr, 0); + } else + return SDValue(); + } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { + // If the input vector is too large, try to split it. + // We don't support having two input vectors that are too large. + if (VecIn2.getNode()) + return SDValue(); - // If the input vector type has a different base type to the output - // vector type, bail out. - if (VecIn1.getValueType().getVectorElementType() != - VT.getVectorElementType()) + if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) + return SDValue(); + + // Try to replace VecIn1 with two extract_subvectors + // No need to update the masks, they should still be correct. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, TLI.getVectorIdxTy())); + UsesZeroVector = false; + } else return SDValue(); - - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } - // If VecIn2 is unused then change it to undef. - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + if (UsesZeroVector) + VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : + DAG.getConstantFP(0.0, VT); + else + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); // Check that we were able to transform all incoming values to the same // type. @@ -10307,10 +11071,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { VecIn1.getValueType() != VT) return SDValue(); - // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. - if (!isTypeLegal(VT)) - return SDValue(); - // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; @@ -10501,6 +11261,92 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, + SDValue V, SelectionDAG &DAG) { + SDLoc DL(V); + EVT VT = V.getValueType(); + + switch (V.getOpcode()) { + default: + return V; + + case ISD::CONCAT_VECTORS: { + EVT OpVT = V->getOperand(0).getValueType(); + int OpSize = OpVT.getVectorNumElements(); + SmallBitVector OpUsedElements(OpSize, false); + bool FoundSimplification = false; + SmallVector<SDValue, 4> NewOps; + NewOps.reserve(V->getNumOperands()); + for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { + SDValue Op = V->getOperand(i); + bool OpUsed = false; + for (int j = 0; j < OpSize; ++j) + if (UsedElements[i * OpSize + j]) { + OpUsedElements[j] = true; + OpUsed = true; + } + NewOps.push_back( + OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) + : DAG.getUNDEF(OpVT)); + FoundSimplification |= Op == NewOps.back(); + OpUsedElements.reset(); + } + if (FoundSimplification) + V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); + return V; + } + + case ISD::INSERT_SUBVECTOR: { + SDValue BaseV = V->getOperand(0); + SDValue SubV = V->getOperand(1); + auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); + if (!IdxN) + return V; + + int SubSize = SubV.getValueType().getVectorNumElements(); + int Idx = IdxN->getZExtValue(); + bool SubVectorUsed = false; + SmallBitVector SubUsedElements(SubSize, false); + for (int i = 0; i < SubSize; ++i) + if (UsedElements[i + Idx]) { + SubVectorUsed = true; + SubUsedElements[i] = true; + UsedElements[i + Idx] = false; + } + + // Now recurse on both the base and sub vectors. + SDValue SimplifiedSubV = + SubVectorUsed + ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) + : DAG.getUNDEF(SubV.getValueType()); + SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); + if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) + V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); + return V; + } + } +} + +static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + int NumElts = VT.getVectorNumElements(); + SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); + for (int M : SVN->getMask()) + if (M >= 0 && M < NumElts) + N0UsedElements[M] = true; + else if (M >= NumElts) + N1UsedElements[M - NumElts] = true; + + SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); + SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); + if (S0 == N0 && S1 == N1) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -10653,6 +11499,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // There are various patterns used to build up a vector from smaller vectors, + // subvectors, or elements. Scan chains of these and replace unused insertions + // or components with undef. + if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) + return S; + if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.getOpcode() == ISD::UNDEF || @@ -10664,99 +11516,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } - // If this shuffle node is simply a swizzle of another shuffle node, - // then try to simplify it. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N1.getOpcode() == ISD::UNDEF) { - - ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(OtherSV->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); - - SmallVector<int, 4> Mask; - // Compute the combined shuffle mask. - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - assert(Idx < (int)NumElts && "Index references undef operand"); - // Next, this index comes from the first value, which is the incoming - // shuffle. Adopt the incoming index. - if (Idx >= 0) - Idx = OtherSV->getMaskElt(Idx); - Mask.push_back(Idx); - } - - bool CommuteOperands = false; - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { - // To be valid, the combine shuffle mask should only reference elements - // from one of the two vectors in input to the inner shufflevector. - bool IsValidMask = true; - for (unsigned i = 0; i != NumElts && IsValidMask; ++i) - // See if the combined mask only reference undefs or elements coming - // from the first shufflevector operand. - IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; - - if (!IsValidMask) { - IsValidMask = true; - for (unsigned i = 0; i != NumElts && IsValidMask; ++i) - // Check that all the elements come from the second shuffle operand. - IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; - CommuteOperands = IsValidMask; - } - - // Early exit if the combined shuffle mask is not valid. - if (!IsValidMask) - return SDValue(); - } - - // See if this pair of shuffles can be safely folded according to either - // of the following rules: - // shuffle(shuffle(x, y), undef) -> x - // shuffle(shuffle(x, undef), undef) -> x - // shuffle(shuffle(x, y), undef) -> y - bool IsIdentityMask = true; - unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; - for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { - // Skip Undefs. - if (Mask[i] < 0) - continue; - - // The combined shuffle must map each index to itself. - IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; - } - - if (IsIdentityMask) { - if (CommuteOperands) - // optimize shuffle(shuffle(x, y), undef) -> y. - return OtherSV->getOperand(1); - - // optimize shuffle(shuffle(x, undef), undef) -> x - // optimize shuffle(shuffle(x, y), undef) -> x - return OtherSV->getOperand(0); - } - - // It may still be beneficial to combine the two shuffles if the - // resulting shuffle is legal. - if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) { - if (!CommuteOperands) - // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, - &Mask[0]); - - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), - &Mask[0]); - } - } - // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) - if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF && + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { // The incoming shuffle must be of the same type as the result of the @@ -10775,13 +11539,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Try to fold according to rules: - // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { + TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); // The incoming shuffle must be of the same type as the result of the @@ -10789,14 +11552,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); - SDValue SV0 = OtherSV->getOperand(0); - SDValue SV1 = OtherSV->getOperand(1); - bool HasSameOp0 = N1 == SV0; - bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; - if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) - // Early exit. - return SDValue(); - + SDValue SV0, SV1; SmallVector<int, 4> Mask; // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. @@ -10808,24 +11564,90 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { continue; } + SDValue CurrentVec; if (Idx < (int)NumElts) { + // This shuffle index refers to the inner shuffle N0. Lookup the inner + // shuffle mask to identify which vector is actually referenced. Idx = OtherSV->getMaskElt(Idx); - if (IsSV1Undef && Idx >= (int) NumElts) - Idx = -1; // Propagate Undef. - } else - Idx = HasSameOp0 ? Idx - NumElts : Idx; + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) + : OtherSV->getOperand(1); + } else { + // This shuffle index references an element within N1. + CurrentVec = N1; + } + + // Simple case where 'CurrentVec' is UNDEF. + if (CurrentVec.getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Canonicalize the shuffle index. We don't know yet if CurrentVec + // will be the first or second operand of the combined shuffle. + Idx = Idx % NumElts; + if (!SV0.getNode() || SV0 == CurrentVec) { + // Ok. CurrentVec is the left hand side. + // Update the mask accordingly. + SV0 = CurrentVec; + Mask.push_back(Idx); + continue; + } + + // Bail out if we cannot convert the shuffle pair into a single shuffle. + if (SV1.getNode() && SV1 != CurrentVec) + return SDValue(); - Mask.push_back(Idx); + // Ok. CurrentVec is the right hand side. + // Update the mask accordingly. + SV1 = CurrentVec; + Mask.push_back(Idx + NumElts); } + // Check if all indices in Mask are Undef. In case, propagate Undef. + bool isUndefMask = true; + for (unsigned i = 0; i != NumElts && isUndefMask; ++i) + isUndefMask &= Mask[i] < 0; + + if (isUndefMask) + return DAG.getUNDEF(VT); + + if (!SV0.getNode()) + SV0 = DAG.getUNDEF(VT); + if (!SV1.getNode()) + SV1 = DAG.getUNDEF(VT); + // Avoid introducing shuffles with illegal mask. - if (TLI.isShuffleMaskLegal(Mask, VT)) { - if (IsSV1Undef) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + if (!TLI.isShuffleMaskLegal(Mask, VT)) { + // Compute the commuted shuffle mask and test again. + for (unsigned i = 0; i != NumElts; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElts) + Mask[i] = idx + NumElts; + else + Mask[i] = idx - NumElts; + } + + if (!TLI.isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + std::swap(SV0, SV1); } + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); } return SDValue(); @@ -10858,8 +11680,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } -/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform -/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle +/// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { @@ -10881,7 +11703,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast<ConstantSDNode>(Elt)->isNullValue()) - Indices.push_back(NumElts); + Indices.push_back(NumElts+i); else return SDValue(); } @@ -10905,7 +11727,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } -/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +/// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); @@ -10991,7 +11813,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +/// Visit a binary vector operation, like FABS/FNEG. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVUnaryOp only works on vectors!"); @@ -11042,8 +11864,8 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorklist(SETCC.getNode()); - return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); } return SCC; @@ -11051,12 +11873,11 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, return SDValue(); } -/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS -/// are the two values being selected between, see if we can simplify the -/// select. Callers of this should assume that TheSelect is deleted if this -/// returns true. As such, they should return the appropriate thing (e.g. the -/// node) back to the top-level of the DAG combiner loop to avoid it being -/// looked at. +/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values +/// being selected between, see if we can simplify the select. Callers of this +/// should assume that TheSelect is deleted if this returns true. As such, they +/// should return the appropriate thing (e.g. the node) back to the top-level of +/// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { @@ -11135,22 +11956,27 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, } SDValue Load; + // It is safe to replace the two loads if they have different alignments, + // but the new load must be the minimum (most restrictive) alignment of the + // inputs. + bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); + unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), - // FIXME: Discards pointer and TBAA info. + // FIXME: Discards pointer and AA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->isInvariant(), LLD->getAlignment()); + isInvariant, Alignment); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer and TBAA info. + // FIXME: Discards pointer and AA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), - LLD->isNonTemporal(), LLD->getAlignment()); + LLD->isNonTemporal(), isInvariant, Alignment); } // Users of the select now use the result of the load. @@ -11166,7 +11992,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, return false; } -/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, @@ -11458,7 +12284,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +/// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans) { @@ -11467,10 +12293,10 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) @@ -11489,10 +12315,29 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return S; } -/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a +/// DAG expression that will generate the same value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + + std::vector<SDNode *> Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + +/// Given an ISD::UDIV node expressing a divide by constant, return a DAG +/// expression that will generate the same value by multiplying by a magic +/// number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) @@ -11511,9 +12356,141 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -/// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself. Provides base object and offset as -// results. +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + + unsigned Iterations = 0; + if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { + if (Iterations) { + // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) + // For the reciprocal, we need to find the zero of the function: + // F(X) = A X - 1 [which has a zero at X = 1/A] + // => + // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form + // does not require additional intermediate precision] + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FPOne = DAG.getConstantFP(1.0, VT); + + AddToWorklist(Est.getNode()); + + // Newton iterations: Est = Est + Est (1 - Arg * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + AddToWorklist(Est.getNode()); + } + } + return Est; + } + + return SDValue(); +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) +/// As a result, we precompute A/2 prior to the iteration loop. +SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + + // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that + // this entire sequence requires only one FP constant. + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + AddToWorklist(HalfArg.getNode()); + + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + AddToWorklist(HalfArg.getNode()); + + // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + AddToWorklist(Est.getNode()); + } + return Est; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) +SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue MinusThree = DAG.getConstantFP(-3.0, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + + // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + AddToWorklist(HalfEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + AddToWorklist(Est.getNode()); + } + return Est; +} + +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + unsigned Iterations = 0; + bool UseOneConstNR = false; + if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + AddToWorklist(Est.getNode()); + if (Iterations) { + Est = UseOneConstNR ? + BuildRsqrtNROneConst(Op, Est, Iterations) : + BuildRsqrtNRTwoConst(Op, Est, Iterations); + } + return Est; + } + + return SDValue(); +} + +/// Return true if base is a frame index, which is known not to alias with +/// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. @@ -11549,8 +12526,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, return isa<FrameIndexSDNode>(Base); } -/// isAlias - Return true if there is any possibility that the two addresses -/// overlap. +/// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. if (Op0->getBasePtr() == Op1->getBasePtr()) return true; @@ -11609,8 +12585,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return false; } - bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : - TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 + ? CombinerGlobalAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -11628,10 +12605,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { AliasAnalysis::AliasResult AAResult = AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), Overlap1, - UseTBAA ? Op0->getTBAAInfo() : nullptr), + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), AliasAnalysis::Location(Op1->getMemOperand()->getValue(), Overlap2, - UseTBAA ? Op1->getTBAAInfo() : nullptr)); + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -11640,7 +12617,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return true; } -/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl<SDValue> &Aliases) { @@ -11676,7 +12653,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode())) + if (!Visited.insert(Chain.getNode()).second) continue; switch (Chain.getOpcode()) { @@ -11751,10 +12728,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // like register copies will interfere with trivial cases). SmallVector<const SDNode *, 16> Worklist; - for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), - IE = Visited.end(); I != IE; ++I) - if (*I != OriginalChain.getNode()) - Worklist.push_back(*I); + for (const SDNode *N : Visited) + if (N != OriginalChain.getNode()) + Worklist.push_back(N); while (!Worklist.empty()) { const SDNode *M = Worklist.pop_back_val(); @@ -11765,7 +12741,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, for (SDNode::use_iterator UI = M->use_begin(), UIE = M->use_end(); UI != UIE; ++UI) - if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (UI.getUse().getValueType() == MVT::Other && + Visited.insert(*UI).second) { if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { // We've not visited this use, and we care about it (it could have an // ordering dependency with the original node). @@ -11781,8 +12758,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } } -/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking -/// for a better chain (aliasing node.) +/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain +/// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. @@ -11801,11 +12778,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -// SelectionDAG::Combine - This is the entry point for the file. -// +/// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { - /// run - This is the main entry point to this class. - /// + /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index ad75e916cefa..97fed230c536 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,12 +40,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -65,34 +65,32 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "isel" STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " - "target-independent selector"); + "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " - "target-specific selector"); + "target-specific selector"); STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); -/// \brief Set CallLoweringInfo attribute flags based on a call instruction -/// and called function attributes. void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, unsigned AttrIdx) { - isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); - isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); - isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); - isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); - isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); - isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); - isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); - isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); - Alignment = CS->getParamAlignment(AttrIdx); -} - -/// startNewBlock - Set the current block to which generated machine -/// instructions will be appended, and clear the local CSE map. -/// + IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + +/// Set the current block to which generated machine instructions will be +/// appended, and clear the local CSE map. void FastISel::startNewBlock() { LocalValueMap.clear(); @@ -105,18 +103,19 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } -bool FastISel::LowerArguments() { +bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - if (!FastLowerArguments()) + if (!fastLowerArguments()) return false; // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), - E = FuncInfo.Fn->arg_end(); I != E; ++I) { + E = FuncInfo.Fn->arg_end(); + I != E; ++I) { DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); assert(VI != LocalValueMap.end() && "Missed an argument?"); FuncInfo.ValueMap[I] = VI->second; @@ -128,22 +127,30 @@ void FastISel::flushLocalValueMap() { LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); + SavedInsertPt = FuncInfo.InsertPt; } -bool FastISel::hasTrivialKill(const Value *V) const { +bool FastISel::hasTrivialKill(const Value *V) { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; // No-op casts are trivially coalesced by fast-isel. - if (const CastInst *Cast = dyn_cast<CastInst>(I)) + if (const auto *Cast = dyn_cast<CastInst>(I)) if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; + // Even the value might have only one use in the LLVM IR, it is possible that + // FastISel might fold the use into another instruction and now there is more + // than one use at the Machine Instruction level. + unsigned Reg = lookUpRegForValue(V); + if (Reg && !MRI.use_empty(Reg)) + return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. - if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + if (const auto *GEP = dyn_cast<GetElementPtrInst>(I)) if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) return false; @@ -176,7 +183,7 @@ unsigned FastISel::getRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. unsigned Reg = lookUpRegForValue(V); - if (Reg != 0) + if (Reg) return Reg; // In bottom-up mode, just create the virtual register which will be used @@ -197,29 +204,24 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -/// materializeRegForValue - Helper for getRegForValue. This function is -/// called when the value isn't already available in a register and must -/// be materialized with new instructions. -unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { +unsigned FastISel::materializeConstant(const Value *V, MVT VT) { unsigned Reg = 0; - - if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) - Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); - } else if (isa<AllocaInst>(V)) { - Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); - } else if (isa<ConstantPointerNull>(V)) { + Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) + Reg = fastMaterializeAlloca(cast<AllocaInst>(V)); + else if (isa<ConstantPointerNull>(V)) // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = - getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - if (CF->isNullValue()) { - Reg = TargetMaterializeFloatZero(CF); - } else { + Reg = getRegForValue( + Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + else if (const auto *CF = dyn_cast<ConstantFP>(V)) { + if (CF->isNullValue()) + Reg = fastMaterializeFloatZero(CF); + else // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); - } + Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -229,22 +231,22 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); unsigned IntegerReg = - getRegForValue(ConstantInt::get(V->getContext(), IntVal)); + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) - Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, - IntegerReg, /*Kill=*/false); + Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, + /*Kill=*/false); } } - } else if (const Operator *Op = dyn_cast<Operator>(V)) { - if (!SelectOperator(Op, Op->getOpcode())) + } else if (const auto *Op = dyn_cast<Operator>(V)) { + if (!selectOperator(Op, Op->getOpcode())) if (!isa<Instruction>(Op) || - !TargetSelectInstruction(cast<Instruction>(Op))) + !fastSelectInstruction(cast<Instruction>(Op))) return 0; Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { @@ -252,15 +254,26 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } + return Reg; +} + +/// Helper for getRegForValue. This function is called when the value isn't +/// already available in a register and must be materialized with new +/// instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + // Give the target-specific code a try first. + if (isa<Constant>(V)) + Reg = fastMaterializeConstant(cast<Constant>(V)); - // If target-independent code couldn't handle the value, give target-specific - // code a try. - if (!Reg && isa<Constant>(V)) - Reg = TargetMaterializeConstant(cast<Constant>(V)); + // If target-specific code couldn't or didn't want to handle the value, then + // give target-independent code a try. + if (!Reg) + Reg = materializeConstant(V, VT); // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) { + if (Reg) { LocalValueMap[V] = Reg; LastLocalValue = MRI.getVRegDef(Reg); } @@ -278,13 +291,7 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { return LocalValueMap[V]; } -/// UpdateValueMap - Update the value map to include the new mapping for this -/// instruction, or insert an extra copy to get the result in a previous -/// determined register. -/// NOTE: This is only necessary because we might select a block that uses -/// a value before we select the block that defines the value. It might be -/// possible to fix this by selecting blocks in reverse postorder. -void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { +void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; return; @@ -297,7 +304,7 @@ void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. for (unsigned i = 0; i < NumRegs; i++) - FuncInfo.RegFixups[AssignedReg+i] = Reg+i; + FuncInfo.RegFixups[AssignedReg + i] = Reg + i; AssignedReg = Reg; } @@ -315,13 +322,12 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { MVT PtrVT = TLI.getPointerTy(); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, - IdxN, IdxNIsKill); + IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, + IdxNIsKill); IdxNIsKill = true; - } - else if (IdxVT.bitsGT(PtrVT)) { - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, - IdxN, IdxNIsKill); + } else if (IdxVT.bitsGT(PtrVT)) { + IdxN = + fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); IdxNIsKill = true; } return std::pair<unsigned, bool>(IdxN, IdxNIsKill); @@ -343,7 +349,7 @@ void FastISel::recomputeInsertPt() { void FastISel::removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { MachineInstr *Dead = &*I; ++I; @@ -358,7 +364,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() { DebugLoc OldDL = DbgLoc; recomputeInsertPt(); DbgLoc = DebugLoc(); - SavePoint SP = { OldInsertPt, OldDL }; + SavePoint SP = {OldInsertPt, OldDL}; return SP; } @@ -371,10 +377,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { DbgLoc = OldInsertPt.DL; } -/// SelectBinaryOp - Select and emit code for a binary operator instruction, -/// which has an opcode which directly corresponds to the given ISD opcode. -/// -bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { +bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -387,9 +390,8 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { if (!TLI.isTypeLegal(VT)) { // MVT::i1 is special. Allow AND, OR, or XOR because they // don't require additional zeroing, which makes them easy. - if (VT == MVT::i1 && - (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || - ISDOpcode == ISD::XOR)) + if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) VT = TLI.getTypeToTransformTo(I->getContext(), VT); else return false; @@ -397,38 +399,36 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the first operand is a constant, and handle it as "ri". At -O0, // we don't have anything that canonicalizes operand order. - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { unsigned Op1 = getRegForValue(I->getOperand(1)); - if (Op1 == 0) return false; - + if (!Op1) + return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); - unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, - Op1IsKill, CI->getZExtValue(), - VT.getSimpleVT()); - if (ResultReg == 0) return false; + unsigned ResultReg = + fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, + CI->getZExtValue(), VT.getSimpleVT()); + if (!ResultReg) + return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } - unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t Imm = CI->getZExtValue(); // Transform "sdiv exact X, 8" -> "sra X, 3". if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && - cast<BinaryOperator>(I)->isExact() && - isPowerOf2_64(Imm)) { + cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) { Imm = Log2_64(Imm); ISDOpcode = ISD::SRA; } @@ -440,54 +440,49 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::AND; } - unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); - if (ResultReg == 0) return false; + if (!ResultReg) + return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // Check if the second operand is a constant float. - if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { - unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, CF); - if (ResultReg != 0) { + if (ResultReg) { // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } } unsigned Op1 = getRegForValue(I->getOperand(1)); - if (Op1 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. - unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, - Op0, Op0IsKill, - Op1, Op1IsKill); - if (ResultReg == 0) + unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); + if (!ResultReg) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool FastISel::SelectGetElementPtr(const User *I) { +bool FastISel::selectGetElementPtr(const User *I) { unsigned N = getRegForValue(I->getOperand(0)); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; - bool NIsKill = hasTrivialKill(I->getOperand(0)); // Keep a running tab of the total offset to coalesce multiple N = N + Offset @@ -497,18 +492,18 @@ bool FastISel::SelectGetElementPtr(const User *I) { uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); - for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, - E = I->op_end(); OI != E; ++OI) { + for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, + E = I->op_end(); + OI != E; ++OI) { const Value *Idx = *OI; - if (StructType *StTy = dyn_cast<StructType>(Ty)) { + if (auto *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -519,15 +514,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->isZero()) continue; + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; // N = N + Offset TotalOffs += - DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -535,9 +530,8 @@ bool FastISel::SelectGetElementPtr(const User *I) { continue; } if (TotalOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -548,43 +542,37 @@ bool FastISel::SelectGetElementPtr(const User *I) { std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; - if (IdxN == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); - if (IdxN == 0) - // Unhandled operand. Halt "fast" selection and bail. + IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; IdxNIsKill = true; } - N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } } if (TotalOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, N); + updateValueMap(I, N); return true; } -/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands -/// to a stackmap or patchpoint machine instruction. bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, const CallInst *CI, unsigned StartIdx) { for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { Value *Val = CI->getArgOperand(i); // Check for constants and encode them with a StackMaps::ConstantOp prefix. - if (auto *C = dyn_cast<ConstantInt>(Val)) { + if (const auto *C = dyn_cast<ConstantInt>(Val)) { Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); } else if (isa<ConstantPointerNull>(Val)) { @@ -601,16 +589,15 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, return false; } else { unsigned Reg = getRegForValue(Val); - if (Reg == 0) + if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); } } - return true; } -bool FastISel::SelectStackmap(const CallInst *I) { +bool FastISel::selectStackmap(const CallInst *I) { // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, // [live variables...]) assert(I->getCalledFunction()->getReturnType()->isVoidTy() && @@ -637,7 +624,7 @@ bool FastISel::SelectStackmap(const CallInst *I) { assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && "Expected a constant integer."); const auto *NumBytes = - cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); // Push live variables for the stack map (skipping the first two arguments @@ -653,13 +640,13 @@ bool FastISel::SelectStackmap(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(0); + .addImm(0); // Issue STACKMAP. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -670,7 +657,8 @@ bool FastISel::SelectStackmap(const CallInst *I) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(0).addImm(0); + .addImm(0) + .addImm(0); // Inform the Frame Information that we have a stackmap in this function. FuncInfo.MF->getFrameInfo()->setHasStackMap(); @@ -709,10 +697,10 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, : CI->getType(); CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); - return LowerCallTo(CLI); + return lowerCallTo(CLI); } -bool FastISel::SelectPatchpoint(const CallInst *I) { +bool FastISel::selectPatchpoint(const CallInst *I) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, @@ -728,7 +716,7 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && "Expected a constant integer."); const auto *NumArgsVal = - cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); + cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); unsigned NumArgs = NumArgsVal->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> @@ -740,6 +728,7 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; CallLoweringInfo CLI; + CLI.setIsPatchPoint(); if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) return false; @@ -764,12 +753,12 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && "Expected a constant integer."); const auto *NumBytes = - cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); // Assume that the callee is a constant address or null pointer. // FIXME: handle function symbols in the future. - unsigned CalleeAddr; + uint64_t CalleeAddr; if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { @@ -818,8 +807,8 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); // Add implicit defs (return values). for (auto Reg : CLI.InRegs) @@ -842,7 +831,7 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); if (CLI.NumResultRegs) - UpdateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); return true; } @@ -861,7 +850,7 @@ static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) { Attrs); } -bool FastISel::LowerCallTo(const CallInst *CI, const char *SymName, +bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, unsigned NumArgs) { ImmutableCallSite CS(CI); @@ -889,10 +878,10 @@ bool FastISel::LowerCallTo(const CallInst *CI, const char *SymName, CallLoweringInfo CLI; CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs); - return LowerCallTo(CLI); + return lowerCallTo(CLI); } -bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { +bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // Handle the incoming return values from the call. CLI.clearIns(); SmallVector<EVT, 4> RetTys; @@ -901,9 +890,8 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI); - bool CanLowerReturn = TLI.CanLowerReturn(CLI.CallConv, *FuncInfo.MF, - CLI.IsVarArg, Outs, - CLI.RetTy->getContext()); + bool CanLowerReturn = TLI.CanLowerReturn( + CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); // FIXME: sret demotion isn't supported yet - bail out. if (!CanLowerReturn) @@ -932,23 +920,23 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { CLI.clearOuts(); for (auto &Arg : CLI.getArgs()) { Type *FinalType = Arg.Ty; - if (Arg.isByVal) + if (Arg.IsByVal) FinalType = cast<PointerType>(Arg.Ty)->getElementType(); bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( - FinalType, CLI.CallConv, CLI.IsVarArg); + FinalType, CLI.CallConv, CLI.IsVarArg); ISD::ArgFlagsTy Flags; - if (Arg.isZExt) + if (Arg.IsZExt) Flags.setZExt(); - if (Arg.isSExt) + if (Arg.IsSExt) Flags.setSExt(); - if (Arg.isInReg) + if (Arg.IsInReg) Flags.setInReg(); - if (Arg.isSRet) + if (Arg.IsSRet) Flags.setSRet(); - if (Arg.isByVal) + if (Arg.IsByVal) Flags.setByVal(); - if (Arg.isInAlloca) { + if (Arg.IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -957,7 +945,7 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { // the various CC lowering callbacks. Flags.setByVal(); } - if (Arg.isByVal || Arg.isInAlloca) { + if (Arg.IsByVal || Arg.IsInAlloca) { PointerType *Ty = cast<PointerType>(Arg.Ty); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = DL.getTypeAllocSize(ElementTy); @@ -969,7 +957,7 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { Flags.setByValSize(FrameSize); Flags.setByValAlign(FrameAlign); } - if (Arg.isNest) + if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); @@ -980,7 +968,7 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { CLI.OutFlags.push_back(Flags); } - if (!FastLowerCall(CLI)) + if (!fastLowerCall(CLI)) return false; // Set all unused physreg defs as dead. @@ -988,12 +976,12 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); if (CLI.NumResultRegs && CLI.CS) - UpdateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); return true; } -bool FastISel::LowerCall(const CallInst *CI) { +bool FastISel::lowerCall(const CallInst *CI) { ImmutableCallSite CS(CI); PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); @@ -1021,19 +1009,19 @@ bool FastISel::LowerCall(const CallInst *CI) { } // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within FastLowerCall. + // Target-dependent constraints are checked within fastLowerCall. bool IsTailCall = CI->isTailCall(); if (IsTailCall && !isInTailCallPosition(CS, TM)) IsTailCall = false; CallLoweringInfo CLI; CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) - .setTailCall(IsTailCall); + .setTailCall(IsTailCall); - return LowerCallTo(CLI); + return lowerCallTo(CLI); } -bool FastISel::SelectCall(const User *I) { +bool FastISel::selectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. @@ -1055,8 +1043,8 @@ bool FastISel::SelectCall(const User *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) - .addExternalSymbol(IA->getAsmString().c_str()) - .addImm(ExtraInfo); + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); return true; } @@ -1065,7 +1053,7 @@ bool FastISel::SelectCall(const User *I) { // Handle intrinsic function calls. if (const auto *II = dyn_cast<IntrinsicInst>(Call)) - return SelectIntrinsicCall(II); + return selectIntrinsicCall(II); // Usually, it does not make sense to initialize a value, // make an unrelated function call and use the value, because @@ -1076,12 +1064,13 @@ bool FastISel::SelectCall(const User *I) { // since they tend to be inlined. flushLocalValueMap(); - return LowerCall(Call); + return lowerCall(Call); } -bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { +bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { - default: break; + default: + break; // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: @@ -1106,7 +1095,7 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { unsigned Offset = 0; Optional<MachineOperand> Op; - if (const Argument *Arg = dyn_cast<Argument>(Address)) + if (const auto *Arg = dyn_cast<Argument>(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) @@ -1137,13 +1126,14 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, - DI->getVariable()); + DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) - .addImm(0) - .addMetadata(DI->getVariable()); + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1160,26 +1150,34 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(0U).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + .addReg(0U) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addCImm(CI).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + .addCImm(CI) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + .addImm(CI->getZExtValue()) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addFPImm(CF).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + .addFPImm(CF) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, - Reg, DI->getOffset(), DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, + DI->getOffset(), DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1192,34 +1190,34 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { unsigned long long Res = CI->isZero() ? -1ULL : 0; Constant *ResCI = ConstantInt::get(II->getType(), Res); unsigned ResultReg = getRegForValue(ResCI); - if (ResultReg == 0) + if (!ResultReg) return false; - UpdateValueMap(II, ResultReg); + updateValueMap(II, ResultReg); return true; } case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); - if (ResultReg == 0) + if (!ResultReg) return false; - UpdateValueMap(II, ResultReg); + updateValueMap(II, ResultReg); return true; } case Intrinsic::experimental_stackmap: - return SelectStackmap(II); + return selectStackmap(II); case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - return SelectPatchpoint(II); + return selectPatchpoint(II); } - return FastLowerIntrinsicCall(II); + return fastLowerIntrinsicCall(II); } -bool FastISel::SelectCast(const User *I, unsigned Opcode) { +bool FastISel::selectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - if (SrcVT == MVT::Other || !SrcVT.isSimple() || - DstVT == MVT::Other || !DstVT.isSimple()) + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || + !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -1238,24 +1236,22 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), - DstVT.getSimpleVT(), - Opcode, - InputReg, InputRegIsKill); + unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool FastISel::SelectBitCast(const User *I) { +bool FastISel::selectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) + if (!Reg) return false; - UpdateValueMap(I, Reg); + updateValueMap(I, Reg); return true; } @@ -1270,17 +1266,15 @@ bool FastISel::SelectBitCast(const User *I) { MVT SrcVT = SrcEVT.getSimpleVT(); MVT DstVT = DstEVT.getSimpleVT(); unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT == DstVT) { - const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -1291,28 +1285,27 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) - ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool -FastISel::SelectInstruction(const Instruction *I) { +bool FastISel::selectInstruction(const Instruction *I) { // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(I)) - if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) return false; DbgLoc = I->getDebugLoc(); - MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + SavedInsertPt = FuncInfo.InsertPt; - if (const CallInst *Call = dyn_cast<CallInst>(I)) { + if (const auto *Call = dyn_cast<CallInst>(I)) { const Function *F = Call->getCalledFunction(); LibFunc::Func Func; @@ -1330,40 +1323,39 @@ FastISel::SelectInstruction(const Instruction *I) { } // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) { - ++NumFastIselSuccessIndependent; - DbgLoc = DebugLoc(); - return true; - } - // Remove dead code. However, ignore call instructions since we've flushed - // the local value map and recomputed the insert point. - if (!isa<CallInst>(I)) { + if (!SkipTargetIndependentISel) { + if (selectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. recomputeInsertPt(); if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + SavedInsertPt = FuncInfo.InsertPt; } - // Next, try calling the target to attempt to handle the instruction. - SavedInsertPt = FuncInfo.InsertPt; - if (TargetSelectInstruction(I)) { + if (fastSelectInstruction(I)) { ++NumFastIselSuccessTarget; DbgLoc = DebugLoc(); return true; } - // Check for dead code and remove as necessary. + // Remove dead code. recomputeInsertPt(); if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DbgLoc = DebugLoc(); + // Undo phi node updates, because they will be added again by SelectionDAG. + if (isa<TerminatorInst>(I)) + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } -/// FastEmitBranch - Emit an unconditional branch to the given block, -/// unless it is the immediate (fall-through) successor, and update -/// the CFG. -void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { +/// Emit an unconditional branch to the given block, unless it is the immediate +/// (fall-through) successor, and update the CFG. +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -1381,54 +1373,51 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } -/// SelectFNeg - Emit an FNeg operation. -/// -bool -FastISel::SelectFNeg(const User *I) { +/// Emit an FNeg operation. +bool FastISel::selectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); - if (OpReg == 0) return false; - + if (!OpReg) + return false; bool OpRegIsKill = hasTrivialKill(I); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(I->getType()); - unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), - ISD::FNEG, OpReg, OpRegIsKill); - if (ResultReg != 0) { - UpdateValueMap(I, ResultReg); + unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + OpReg, OpRegIsKill); + if (ResultReg) { + updateValueMap(I, ResultReg); return true; } // Bitcast the value to integer, twiddle the sign bit with xor, // and then bitcast it back to floating-point. - if (VT.getSizeInBits() > 64) return false; + if (VT.getSizeInBits() > 64) + return false; EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); if (!TLI.isTypeLegal(IntVT)) return false; - unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), ISD::BITCAST, OpReg, OpRegIsKill); - if (IntReg == 0) + if (!IntReg) return false; - unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, - IntReg, /*Kill=*/true, - UINT64_C(1) << (VT.getSizeInBits()-1), - IntVT.getSimpleVT()); - if (IntResultReg == 0) + unsigned IntResultReg = fastEmit_ri_( + IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, + UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); + if (!IntResultReg) return false; - ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BITCAST, IntResultReg, /*Kill=*/true); - if (ResultReg == 0) + ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, + IntResultReg, /*IsKill=*/true); + if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool -FastISel::SelectExtractValue(const User *U) { +bool FastISel::selectExtractValue(const User *U) { const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); if (!EVI) return false; @@ -1464,55 +1453,54 @@ FastISel::SelectExtractValue(const User *U) { for (unsigned i = 0; i < VTIndex; i++) ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); - UpdateValueMap(EVI, ResultReg); + updateValueMap(EVI, ResultReg); return true; } -bool -FastISel::SelectOperator(const User *I, unsigned Opcode) { +bool FastISel::selectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: - return SelectBinaryOp(I, ISD::ADD); + return selectBinaryOp(I, ISD::ADD); case Instruction::FAdd: - return SelectBinaryOp(I, ISD::FADD); + return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: - return SelectBinaryOp(I, ISD::SUB); + return selectBinaryOp(I, ISD::SUB); case Instruction::FSub: // FNeg is currently represented in LLVM IR as a special case of FSub. if (BinaryOperator::isFNeg(I)) - return SelectFNeg(I); - return SelectBinaryOp(I, ISD::FSUB); + return selectFNeg(I); + return selectBinaryOp(I, ISD::FSUB); case Instruction::Mul: - return SelectBinaryOp(I, ISD::MUL); + return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: - return SelectBinaryOp(I, ISD::FMUL); + return selectBinaryOp(I, ISD::FMUL); case Instruction::SDiv: - return SelectBinaryOp(I, ISD::SDIV); + return selectBinaryOp(I, ISD::SDIV); case Instruction::UDiv: - return SelectBinaryOp(I, ISD::UDIV); + return selectBinaryOp(I, ISD::UDIV); case Instruction::FDiv: - return SelectBinaryOp(I, ISD::FDIV); + return selectBinaryOp(I, ISD::FDIV); case Instruction::SRem: - return SelectBinaryOp(I, ISD::SREM); + return selectBinaryOp(I, ISD::SREM); case Instruction::URem: - return SelectBinaryOp(I, ISD::UREM); + return selectBinaryOp(I, ISD::UREM); case Instruction::FRem: - return SelectBinaryOp(I, ISD::FREM); + return selectBinaryOp(I, ISD::FREM); case Instruction::Shl: - return SelectBinaryOp(I, ISD::SHL); + return selectBinaryOp(I, ISD::SHL); case Instruction::LShr: - return SelectBinaryOp(I, ISD::SRL); + return selectBinaryOp(I, ISD::SRL); case Instruction::AShr: - return SelectBinaryOp(I, ISD::SRA); + return selectBinaryOp(I, ISD::SRA); case Instruction::And: - return SelectBinaryOp(I, ISD::AND); + return selectBinaryOp(I, ISD::AND); case Instruction::Or: - return SelectBinaryOp(I, ISD::OR); + return selectBinaryOp(I, ISD::OR); case Instruction::Xor: - return SelectBinaryOp(I, ISD::XOR); + return selectBinaryOp(I, ISD::XOR); case Instruction::GetElementPtr: - return SelectGetElementPtr(I); + return selectGetElementPtr(I); case Instruction::Br: { const BranchInst *BI = cast<BranchInst>(I); @@ -1520,7 +1508,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; - FastEmitBranch(MSucc, BI->getDebugLoc()); + fastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -1531,7 +1519,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Unreachable: if (TM.Options.TrapUnreachable) - return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; else return true; @@ -1544,38 +1532,39 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { return false; case Instruction::Call: - return SelectCall(I); + return selectCall(I); case Instruction::BitCast: - return SelectBitCast(I); + return selectBitCast(I); case Instruction::FPToSI: - return SelectCast(I, ISD::FP_TO_SINT); + return selectCast(I, ISD::FP_TO_SINT); case Instruction::ZExt: - return SelectCast(I, ISD::ZERO_EXTEND); + return selectCast(I, ISD::ZERO_EXTEND); case Instruction::SExt: - return SelectCast(I, ISD::SIGN_EXTEND); + return selectCast(I, ISD::SIGN_EXTEND); case Instruction::Trunc: - return SelectCast(I, ISD::TRUNCATE); + return selectCast(I, ISD::TRUNCATE); case Instruction::SIToFP: - return SelectCast(I, ISD::SINT_TO_FP); + return selectCast(I, ISD::SINT_TO_FP); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) - return SelectCast(I, ISD::ZERO_EXTEND); + return selectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) - return SelectCast(I, ISD::TRUNCATE); + return selectCast(I, ISD::TRUNCATE); unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) return false; - UpdateValueMap(I, Reg); + if (!Reg) + return false; + updateValueMap(I, Reg); return true; } case Instruction::ExtractValue: - return SelectExtractValue(I); + return selectExtractValue(I); case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -1586,91 +1575,72 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) - : FuncInfo(funcInfo), - MF(funcInfo.MF), - MRI(FuncInfo.MF->getRegInfo()), - MFI(*FuncInfo.MF->getFrameInfo()), - MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), - DL(*TM.getDataLayout()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()), - LibInfo(libInfo) { -} +FastISel::FastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo, + bool SkipTargetIndependentISel) + : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()), + TII(*MF->getSubtarget().getInstrInfo()), + TLI(*MF->getSubtarget().getTargetLowering()), + TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), + SkipTargetIndependentISel(SkipTargetIndependentISel) {} FastISel::~FastISel() {} -bool FastISel::FastLowerArguments() { - return false; -} +bool FastISel::fastLowerArguments() { return false; } -bool FastISel::FastLowerCall(CallLoweringInfo &/*CLI*/) { - return false; -} +bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; } -bool FastISel::FastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { +bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { return false; } -unsigned FastISel::FastEmit_(MVT, MVT, - unsigned) { - return 0; -} +unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } -unsigned FastISel::FastEmit_r(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/) { +unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/) { return 0; } -unsigned FastISel::FastEmit_rr(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - unsigned /*Op1*/, bool /*Op1IsKill*/) { +unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_f(MVT, MVT, - unsigned, const ConstantFP * /*FPImm*/) { +unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, + const ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_ri(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_rf(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, +unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, const ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_rri(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - unsigned /*Op1*/, bool /*Op1IsKill*/, - uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/, uint64_t /*Imm*/) { return 0; } -/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with an immediate operand using FastEmit_ri. +/// This method is a wrapper of fastEmit_ri. It first tries to emit an +/// instruction with an immediate operand using fastEmit_ri. /// If that fails, it materializes the immediate into a register and try -/// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - uint64_t Imm, MVT ImmType) { +/// fastEmit_rr instead. +unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, + bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { Opcode = ISD::SHL; @@ -1688,30 +1658,29 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, return 0; // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); - if (ResultReg != 0) + unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg) return ResultReg; - unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) { + unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. - IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), - VT.getSizeInBits()); + IntegerType *ITy = + IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); - assert (MaterialReg != 0 && "Unable to materialize imm."); - if (MaterialReg == 0) return 0; + if (!MaterialReg) + return 0; } - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, + /*IsKill=*/true); } -unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { +unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { return MRI.createVirtualRegister(RC); } -unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, - unsigned Op, unsigned OpNum) { +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, + unsigned OpNum) { if (TargetRegisterInfo::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); @@ -1727,8 +1696,8 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, return Op; } -unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass* RC) { +unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); @@ -1736,9 +1705,9 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill) { +unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1746,10 +1715,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } @@ -1757,10 +1726,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill) { +unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1769,23 +1738,23 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill) { +unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, unsigned Op2, + bool Op2IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1795,48 +1764,46 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); - RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF); - MRI.constrainRegClass(Op0, RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm1, uint64_t Imm2) { +unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm1, + uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1844,24 +1811,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm1) - .addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm1) - .addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm) { +unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1869,23 +1835,22 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addFPImm(FPImm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addFPImm(FPImm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1894,25 +1859,25 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, +unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm1, uint64_t Imm2) { + unsigned Op0, bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm1, + uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1921,28 +1886,30 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm1).addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm1) + .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm1).addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1951,41 +1918,41 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2) { +unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm1, + uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addImm(Imm1).addImm(Imm2); + .addImm(Imm1) + .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx) { +unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, + bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } -/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op -/// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { - return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +/// Emit MachineInstrs to compute the value of Op with all but the least +/// significant bit set to zero. +unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. @@ -1994,22 +1961,24 @@ unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { /// nodes as input. We cannot just directly add them, because expansion /// might result in multiple MBB's for one BB. As such, the start of the /// BB might correspond to a different MBB than the end. -bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { +bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); - if (!isa<PHINode>(SuccBB->begin())) continue; + if (!isa<PHINode>(SuccBB->begin())) + continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; + if (!SuccsHandled.insert(SuccMBB).second) + continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); @@ -2017,10 +1986,11 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); - const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + const auto *PN = dyn_cast<PHINode>(I); ++I) { // Ignore dead phi's. - if (PN->use_empty()) continue; + if (PN->use_empty()) + continue; // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, @@ -2031,10 +2001,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. - if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) - VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); - else { - FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } } @@ -2044,12 +2012,12 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. DbgLoc = PN->getDebugLoc(); - if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) + if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); - if (Reg == 0) { - FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + if (!Reg) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); @@ -2062,17 +2030,17 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { assert(LI->hasOneUse() && - "tryToFoldLoad expected a LoadInst with a single use"); + "tryToFoldLoad expected a LoadInst with a single use"); // We know that the load has a single use, but don't know what it is. If it // isn't one of the folded instructions, then we can't succeed here. Handle // this by scanning the single-use users of the load until we get to FoldInst. - unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. const Instruction *TheUser = LI->user_back(); - while (TheUser != FoldInst && // Scan up until we find FoldInst. + while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && - --MaxUsers) { // Don't scan too far. + --MaxUsers) { // Don't scan too far. // If there are multiple or no uses of this instruction, then bail out. if (!TheUser->hasOneUse()) return false; @@ -2094,7 +2062,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // then there actually was no reference to it. Perhaps the load is referenced // by a dead instruction. unsigned LoadReg = getRegForValue(LI); - if (LoadReg == 0) + if (!LoadReg) return false; // We can't fold if this vreg has no uses or more than one use. Multiple uses @@ -2152,19 +2120,20 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags = MachineMemOperand::MOStore; Ptr = SI->getPointerOperand(); ValTy = SI->getValueOperand()->getType(); - } else { + } else return nullptr; - } - bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; - bool IsInvariant = I->getMetadata("invariant.load") != nullptr; - const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); - if (Alignment == 0) // Ensure that codegen never sees alignment 0. + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. Alignment = DL.getABITypeAlignment(ValTy); - unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + unsigned Size = DL.getTypeStoreSize(ValTy); if (IsVolatile) Flags |= MachineMemOperand::MOVolatile; @@ -2174,5 +2143,45 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags |= MachineMemOperand::MOInvariant; return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, - Alignment, TBAAInfo, Ranges); + Alignment, AAInfo, Ranges); +} + +CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index ae124e852a91..19aca6edd451 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -55,58 +56,71 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } +static ISD::NodeType getPreferredExtendForValue(const Value *V) { + // For the users of the source value being used for compare instruction, if + // the number of signed predicate is greater than unsigned predicate, we + // prefer to use SIGN_EXTEND. + // + // With this optimization, we would be able to reduce some redundant sign or + // zero extension instruction, and eventually more machine CSE opportunities + // can be exposed. + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + unsigned NumOfSigned = 0, NumOfUnsigned = 0; + for (const User *U : V->users()) { + if (const auto *CI = dyn_cast<CmpInst>(U)) { + NumOfSigned += CI->isSigned(); + NumOfUnsigned += CI->isUnsigned(); + } + } + if (NumOfSigned > NumOfUnsigned) + ExtendKind = ISD::SIGN_EXTEND; + + return ExtendKind; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { - const TargetLowering *TLI = TM.getTargetLowering(); - Fn = &fn; MF = &mf; + TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), - Outs, Fn->getContext()); + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - // Don't fold inalloca allocas or other dynamic allocas into the initial - // stack frame allocation, even if they are in the entry block. - if (!AI->isStaticAlloca()) - continue; - - if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { - Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), - AI->getAlignment()); - - TySize *= CUI->getZExtValue(); // Get total allocated size. - if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - - StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } - } - for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for dynamic allocas. if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - if (!AI->isStaticAlloca()) { + // Static allocas can be folded into the initial stack frame adjustment. + if (AI->isStaticAlloca()) { + const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); + Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + + } else { unsigned Align = std::max( (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( AI->getAllocatedType()), AI->getAlignment()); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + MF->getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. @@ -126,9 +140,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); - std::pair<unsigned, const TargetRegisterClass*> PhysReg = - TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, - Op.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, + Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } @@ -136,6 +150,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } + // Look for calls to the @llvm.va_start intrinsic. We can omit some + // prologue boilerplate for variadic functions that don't examine their + // arguments. + if (const auto *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + MF->getFrameInfo()->setHasVAStart(true); + } + + // If we have a musttail call in a variadic funciton, we need to ensure we + // forward implicit register parameters. + if (const auto *CI = dyn_cast<CallInst>(I)) { + if (CI->isMustTailCall() && Fn->isVarArg()) + MF->getFrameInfo()->setHasMustTailInVarArgFunc(true); + } + // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -166,13 +195,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, StaticAllocaMap.find(AI); if (SI != StaticAllocaMap.end()) { // Check for VLAs. int FI = SI->second; - MMI.setVariableDbgInfo(DI->getVariable(), + MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(), FI, DI->getDebugLoc()); } } } } } + + // Decide the preferred extend type for a value. + PreferredExtendType[I] = getPreferredExtendForValue(I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This @@ -208,7 +240,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); PHIReg += NumRegisters; @@ -241,12 +273,14 @@ void FunctionLoweringInfo::clear() { ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + StatepointStackSlots.clear(); + PreferredExtendType.clear(); } /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo-> - createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); + return RegInfo->createVirtualRegister( + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -257,7 +291,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); @@ -306,8 +340,6 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; - const TargetLowering *TLI = TM.getTargetLowering(); - SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && @@ -452,7 +484,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. - std::vector<const GlobalVariable *> TyInfo; + std::vector<const GlobalValue *> TyInfo; unsigned N = I.getNumArgOperands(); for (unsigned i = N - 1; i > 1; --i) { @@ -510,14 +542,14 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, Value *Val = I.getClause(i - 1); if (I.isCatch(i - 1)) { MMI.addCatchTypeInfo(MBB, - dyn_cast<GlobalVariable>(Val->stripPointerCasts())); + dyn_cast<GlobalValue>(Val->stripPointerCasts())); } else { // Add filters in a list. Constant *CVal = cast<Constant>(Val); - SmallVector<const GlobalVariable*, 4> FilterList; + SmallVector<const GlobalValue*, 4> FilterList; for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) - FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts())); + FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); MMI.addFilterTypeInfo(MBB, FilterList); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 7c124b8caa91..a65f33e17774 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -265,12 +265,16 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MIB.addReg(VRBase, RegState::Define); } - SDValue Op(Node, i); - if (IsClone) - VRBaseMap.erase(Op); - bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); + // If this def corresponds to a result of the SDNode insert the VRBase into + // the lookup map. + if (i < NumResults) { + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } } } @@ -402,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = TM->getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = TM->getDataLayout()->getTypeAllocSize(Type); + Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type); } } @@ -643,14 +647,18 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { uint64_t Offset = SD->getOffset(); - MDNode* MDPtr = SD->getMDPtr(); + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(SD->getFrameIx()) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -696,7 +704,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U, RegState::Debug); } - MIB.addMetadata(MDPtr); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); return &*MIB; } @@ -859,9 +868,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. -#ifdef NDEBUG if (II.hasPostISelHook()) -#endif TLI->AdjustInstrPostInstrSelection(MIB, Node); } @@ -1013,11 +1020,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// at the given position in the given block. InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos) - : MF(mbb->getParent()), - MRI(&MF->getRegInfo()), - TM(&MF->getTarget()), - TII(TM->getInstrInfo()), - TRI(TM->getRegisterInfo()), - TLI(TM->getTargetLowering()), - MBB(mbb), InsertPos(insertpos) { -} + : MF(mbb->getParent()), MRI(&MF->getRegInfo()), + TII(MF->getSubtarget().getInstrInfo()), + TRI(MF->getSubtarget().getRegisterInfo()), + TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), + InsertPos(insertpos) {} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 920dda8820f6..7b86f7dd8de0 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef INSTREMITTER_H -#define INSTREMITTER_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -29,7 +29,6 @@ class SDDbgValue; class InstrEmitter { MachineFunction *MF; MachineRegisterInfo *MRI; - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const TargetLowering *TLI; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 16c5b4ba7768..e5473e35caed 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -33,10 +34,13 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "legalizedag" + //===----------------------------------------------------------------------===// -/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves /// eliminating value sizes the machine cannot handle (promoting small sizes to /// large sizes or splitting up large values into small values) as well as @@ -48,16 +52,17 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { +class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - /// LegalizePosition - The iterator for walking through the node list. - SelectionDAG::allnodes_iterator LegalizePosition; + /// \brief The set of nodes which have already been legalized. We hold a + /// reference to it in order to update as necessary on node deletion. + SmallPtrSetImpl<SDNode *> &LegalizedNodes; - /// LegalizedNodes - The set of nodes which have already been legalized. - SmallPtrSet<SDNode *, 16> LegalizedNodes; + /// \brief A set of all the nodes updated during legalization. + SmallSetVector<SDNode *, 16> *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); @@ -66,20 +71,22 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { // Libcall insertion helpers. public: - explicit SelectionDAGLegalize(SelectionDAG &DAG); - - void LegalizeDAG(); + SelectionDAGLegalize(SelectionDAG &DAG, + SmallPtrSetImpl<SDNode *> &LegalizedNodes, + SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr) + : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), + LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} -private: - /// LegalizeOp - Legalizes the given operation. + /// \brief Legalizes the given operation. void LegalizeOp(SDNode *Node); +private: SDValue OptimizeFloatStore(StoreSDNode *ST); void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); - /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// Some targets cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. @@ -88,7 +95,7 @@ private: SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl); - /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> @@ -145,43 +152,55 @@ private: void ExpandNode(SDNode *Node); void PromoteNode(SDNode *Node); - void ForgetNode(SDNode *N) { - LegalizedNodes.erase(N); - if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) - ++LegalizePosition; - } - public: - // DAGUpdateListener implementation. - void NodeDeleted(SDNode *N, SDNode *E) override { - ForgetNode(N); - } - void NodeUpdated(SDNode *N) override {} - // Node replacement helpers void ReplacedNode(SDNode *N) { - if (N->use_empty()) { - DAG.RemoveDeadNode(N); - } else { - ForgetNode(N); - } + LegalizedNodes.erase(N); + if (UpdatedNodes) + UpdatedNodes->insert(N); } void ReplaceNode(SDNode *Old, SDNode *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + assert(Old->getNumValues() == New->getNumValues() && + "Replacing one node with another that produces a different number " + "of values!"); DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); + if (UpdatedNodes) + UpdatedNodes->insert(New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + DAG.ReplaceAllUsesWith(Old, New); + DAG.TransferDbgValues(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + DEBUG(dbgs() << (i == 0 ? " with: " + : " and: "); + New[i]->dump(&DAG)); + DAG.TransferDbgValues(SDValue(Old, i), New[i]); + if (UpdatedNodes) + UpdatedNodes->insert(New[i].getNode()); + } ReplacedNode(Old); } }; } -/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> @@ -213,41 +232,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) - : SelectionDAG::DAGUpdateListener(dag), - TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag) { -} - -void SelectionDAGLegalize::LegalizeDAG() { - DAG.AssignTopologicalOrder(); - - // Visit all the nodes. We start in topological order, so that we see - // nodes with their original operands intact. Legalization can produce - // new nodes which may themselves need to be legalized. Iterate until all - // nodes have been legalized. - for (;;) { - bool AnyLegalized = false; - for (LegalizePosition = DAG.allnodes_end(); - LegalizePosition != DAG.allnodes_begin(); ) { - --LegalizePosition; - - SDNode *N = LegalizePosition; - if (LegalizedNodes.insert(N)) { - AnyLegalized = true; - LegalizeOp(N); - } - } - if (!AnyLegalized) - break; - - } - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - -/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { @@ -275,7 +260,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. - TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); @@ -291,7 +276,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); + VT, false, false, false, Alignment); return Result; } SDValue Result = @@ -301,7 +286,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { return Result; } -/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +/// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, SelectionDAGLegalize *DAGLegalize) { @@ -377,7 +362,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, MachinePointerInfo(), - MemVT, false, false, 0); + MemVT, false, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo() @@ -385,7 +370,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, MemVT, ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset), - ST->getTBAAInfo())); + ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); @@ -417,14 +402,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment, ST->getTBAAInfo()); + Alignment, ST->getAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } -/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +/// Expands an unaligned load to 2 half-size loads. static void ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, const TargetLowering &TLI, @@ -476,7 +461,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), MinAlign(LD->getAlignment(), Offset), - LD->getTBAAInfo()); + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -494,8 +479,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset), - LD->getTBAAInfo()); + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -508,7 +494,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT, false, false, 0); + MachinePointerInfo(), LoadedVT, false,false, false, + 0); // Callers expect a MERGE_VALUES node. ValResult = Load; @@ -538,25 +525,27 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), - LD->getTBAAInfo()); + LD->isNonTemporal(),LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), - LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); } // aggregate the two parts @@ -572,8 +561,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, ChainResult = TF; } -/// PerformInsertVectorEltInMemory - Some target cannot handle a variable -/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// Some target cannot handle a variable insertion index for the +/// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue SelectionDAGLegalize:: @@ -659,7 +648,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -668,7 +657,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -677,7 +666,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -690,13 +679,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U), - TBAAInfo); + AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -714,7 +703,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -731,12 +720,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // If this is an unaligned store and the target doesn't support it, // expand it. unsigned AS = ST->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + unsigned Align = ST->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), - DAG, TLI, this); + if (Align < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; } @@ -754,7 +743,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -776,8 +765,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + NVT, isVolatile, isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -799,7 +787,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -811,7 +799,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -821,7 +809,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -830,7 +818,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); } // The order of the stores doesn't matter. @@ -842,12 +830,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) + if (Align < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; @@ -868,7 +857,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -893,13 +882,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ + if (Align < ABIAlignment){ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); } } @@ -928,6 +918,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(RVal.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + if (UpdatedNodes) { + UpdatedNodes->insert(RVal.getNode()); + UpdatedNodes->insert(RChain.getNode()); + } ReplacedNode(Node); } return; @@ -938,7 +932,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -949,7 +944,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // nice to have an effective generic way of getting these benefits... // Until such a way is found, don't insist on promoting i1 here. (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == + TargetLowering::Promote)) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); @@ -965,7 +961,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + NVT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); Ch = Result.getValue(1); // The chain. @@ -1002,7 +999,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1010,8 +1007,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1031,7 +1028,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1040,8 +1037,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1060,7 +1057,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Ch; } else { bool isCustom = false; - switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { + switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), + SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; @@ -1080,37 +1078,37 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // it, expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); + unsigned Align = LD->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); + if (Align < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); } } } break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && - TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + // If the source type is not legal, see if there is a legal extload to + // an intermediate type that we can then extend further. + EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); + if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? + TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { + // If we are loading a legal type, this is a non-extload followed by a + // full extend. + ISD::LoadExtType MidExtType = + (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; + + SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, + SrcVT, LD->getMemOperand()); + unsigned ExtendOp = + ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; } assert(!SrcVT.isVector() && @@ -1134,8 +1132,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); else - ValRes = DAG.getZeroExtendInReg(Result, dl, - SrcVT.getScalarType()); + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); Value = ValRes; Chain = Result.getValue(1); break; @@ -1148,13 +1145,18 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(Value.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Value.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); } } -/// LegalizeOp - Return a legal replacement for the given operation, with -/// all legal operands. +/// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; @@ -1335,10 +1337,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - ReplacedNode(Node); + ReplaceNode(Node, NewNode); Node = NewNode; } switch (Action) { @@ -1349,19 +1348,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // a complete mess. SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) { - SmallVector<SDValue, 8> ResultVals; - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { - if (e == 1) - ResultVals.push_back(Res); - else - ResultVals.push_back(Res.getValue(i)); - } - if (Res.getNode() != Node || Res.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data()); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - ReplacedNode(Node); + if (!(Res.getNode() != Node || Res.getResNo() != 0)) + return; + + if (Node->getNumValues() == 1) { + // We can just directly replace this node with the lowered value. + ReplaceNode(SDValue(Node, 0), Res); + return; } + + SmallVector<SDValue, 8> ResultVals; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Res.getValue(i)); + ReplaceNode(Node, ResultVals.data()); return; } } @@ -1449,7 +1448,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -1484,7 +1483,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { StackPtr); // Store the subvector. - Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo(), false, false, 0); // Finally, load the updated vector. @@ -1624,7 +1623,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, @@ -1639,8 +1639,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Results.push_back(Tmp2); } -/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. +/// Legalize a SETCC with given LHS and RHS and condition code CC on the current +/// target. /// /// If the SETCC has been legalized using AND / OR, then the legalized node /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert @@ -1754,7 +1754,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, return false; } -/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// Emit a store/load combination to the stack. This stores /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. @@ -1798,7 +1798,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, - PtrInfo, SlotVT, false, false, DestAlign); + PtrInfo, SlotVT, false, false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1878,7 +1878,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, ShuffleVec.data()); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; - NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + NewIntermedVals.push_back( + std::make_pair(Shuffle, std::move(FinalIndices))); } // If we had an odd number of defined values, then append the last @@ -1913,7 +1914,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, return true; } -/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); @@ -2025,7 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } -// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result // and leave the Hi part unset. @@ -2073,7 +2074,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } -/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// Generate a libcall taking the given operands as arguments /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, @@ -2104,7 +2105,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, return CallInfo.first; } -// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, @@ -2174,7 +2175,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +/// Return true if divmod libcall is available. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; @@ -2190,8 +2191,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, return TLI.getLibcallName(LC) != nullptr; } -/// useDivRem - Only issue divrem libcall if both quotient and remainder are -/// needed. +/// Only issue divrem libcall if both quotient and remainder are needed. static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { // The other use might have been replaced with a divrem already. unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; @@ -2216,8 +2216,7 @@ static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { return false; } -/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem -/// pairs. +/// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -2279,7 +2278,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } -/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +/// Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { @@ -2293,8 +2292,8 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// canCombineSinCosLibcall - Return true if sincos libcall is available and -/// can be used to combine sin and cos. +/// Return true if sincos libcall is available and can be used to combine sin +/// and cos. static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, const TargetMachine &TM) { if (!isSinCosLibcallAvailable(Node, TLI)) @@ -2307,8 +2306,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, return true; } -/// useSinCos - Only issue sincos libcall if both sin and cos are -/// needed. +/// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; @@ -2326,8 +2324,7 @@ static bool useSinCos(SDNode *Node) { return false; } -/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos -/// pairs. +/// Issue libcalls to sincos to compute sin / cos pairs. void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -2392,7 +2389,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, MachinePointerInfo(), false, false, false, 0)); } -/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. @@ -2581,7 +2578,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment); + MVT::f32, false, false, false, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2590,7 +2587,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } -/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// *INT_TO_FP operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP @@ -2632,7 +2629,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, dl, NewInTy, LegalOp)); } -/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// FP_TO_*INT operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT @@ -2676,8 +2673,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } -/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. -/// +/// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT); @@ -2723,8 +2719,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { } } -/// ExpandBitCount - Expand the specified bitcount instruction into operations. -/// +/// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl) { switch (Opc) { @@ -2783,7 +2778,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // x = x | (x >>32); // for 64-bit input // return popcount(~x); // - // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); @@ -2802,7 +2797,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } - // see also http://www.hackersdelight.org/HDcode/ntz.cc + // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), @@ -3396,6 +3391,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; case ISD::FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, @@ -3514,6 +3519,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; case ISD::FP16_TO_FP: { if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); @@ -3546,12 +3561,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FSUB: { EVT VT = Node->getValueType(0); - assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && - TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && - "Don't know how to expand this FP subtraction!"); - Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); - Results.push_back(Tmp1); + if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + } break; } case ISD::SUB: { @@ -3806,9 +3825,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, delete the - // node. The above EXTRACT_ELEMENT nodes should have been folded. - DAG.DeleteNode(Ret.getNode()); + // generally permitted during this phase of legalization, make sure the + // node has no more uses. The above EXTRACT_ELEMENT nodes should have been + // folded. + assert(Ret->use_empty() && + "Unexpected uses of illegally type from expanded lib call."); } if (isSigned) { @@ -3869,7 +3890,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(), MemVT, - false, false, 0); + false, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: @@ -4179,6 +4200,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Tmp2.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); break; } @@ -4255,6 +4280,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: case ISD::FDIV: case ISD::FREM: case ISD::FPOW: { @@ -4282,10 +4310,57 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ReplaceNode(Node, Results.data()); } -// SelectionDAG::Legalize - This is the entry point for the file. -// +/// This is the entry point for the file. void SelectionDAG::Legalize() { - /// run - This is the main entry point to this class. - /// - SelectionDAGLegalize(*this).LegalizeDAG(); + AssignTopologicalOrder(); + + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (auto NI = allnodes_end(); NI != allnodes_begin();) { + --NI; + + SDNode *N = NI; + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + continue; + } + + if (LegalizedNodes.insert(N).second) { + AnyLegalized = true; + Legalizer.LegalizeOp(N); + + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + } + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + RemoveDeadNodes(); +} + +bool SelectionDAG::LegalizeOp(SDNode *N, + SmallSetVector<SDNode *, 16> &UpdatedNodes) { + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); + + // Directly insert the node in question, and legalize it. This will recurse + // as needed through operands. + LegalizedNodes.insert(N); + Legalizer.LegalizeOp(N); + + return LegalizedNodes.count(N); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 649dd7a349ff..4591e79316d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -68,6 +68,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; @@ -153,6 +155,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + NVT, Ops, 2, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + NVT, Ops, 2, false, SDLoc(N)).first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -377,10 +405,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's // entirely possible for both f16 and f32 to be legal, so use the fully // hard-float FP_EXTEND rather than FP16_TO_FP. - if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) + SoftenFloatResult(Op.getNode(), 0); + } RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) + Op = GetSoftenedFloat(Op); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } @@ -543,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), NVT, L->isVolatile(), L->isNonTemporal(), false, L->getAlignment(), - L->getTBAAInfo()); + L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -556,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), L->isNonTemporal(), false, L->getAlignment(), - L->getTBAAInfo()); + L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -851,6 +884,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; @@ -914,6 +949,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, ISD::SETEQ); } +void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 44d9e3875b83..82b114b80aa9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -66,6 +66,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; @@ -342,9 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. - return DAG.getNode(ISD::SUB, dl, NVT, Op, - DAG.getConstant(NVT.getSizeInBits() - - OVT.getSizeInBits(), NVT)); + return DAG.getNode( + ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), + NVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { @@ -362,8 +364,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. - APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.setBit(OVT.getSizeInBits()); + auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), + OVT.getScalarSizeInBits()); Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); } return DAG.getNode(N->getOpcode(), dl, NVT, Op); @@ -453,6 +455,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); + SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT); + SDLoc dl(N); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOLoad, NVT.getStoreSize(), + N->getAlignment(), N->getAAInfo(), N->getRanges()); + + SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), + ExtMask, ExtSrc0, MMO); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. @@ -824,6 +844,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), + OpNo); break; + case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -861,7 +885,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: - case ISD::SETNE: + case ISD::SETNE: { + SDValue OpL = GetPromotedInteger(NewLHS); + SDValue OpR = GetPromotedInteger(NewRHS); + + // We would prefer to promote the comparison operand with sign extension, + // if we find the operand is actually to truncate an AssertSext. With this + // optimization, we can avoid inserting real truncate instruction, which + // is redudant eventually. + if (OpL->getOpcode() == ISD::AssertSext && + cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && + OpR->getOpcode() == ISD::AssertSext && + cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { + NewLHS = OpL; + NewRHS = OpR; + } else { + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + } + break; + } case ISD::SETUGE: case ISD::SETUGT: case ISD::SETULE: @@ -945,7 +988,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && - "Legal vector of one illegal element?"); + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -1071,6 +1114,63 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ + + assert(OpNo == 2 && "Only know how to promote the mask!"); + SDValue DataOp = N->getData(); + EVT DataVT = DataOp.getValueType(); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDLoc dl(N); + + if (!TLI.isTypeLegal(DataVT)) { + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + } + else { + assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && + "Unexpected data legalization in MSTORE"); + DataOp = GetWidenedVector(DataOp); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(DataOp.getValueType()); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + } + } + else + Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[2] = Mask; + NewOps[3] = DataOp; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getValueType(0); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); @@ -1859,7 +1959,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1868,7 +1968,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1891,7 +1992,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1903,8 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1922,7 +2023,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1931,8 +2033,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2709,7 +2811,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2719,7 +2821,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment, TBAAInfo); + Alignment, AAInfo); } if (TLI.isLittleEndian()) { @@ -2727,7 +2829,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2740,7 +2842,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2768,7 +2870,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + HiVT, isVolatile, isNonTemporal, Alignment, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2778,7 +2880,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2853,7 +2955,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { FudgePtr, MachinePointerInfo::getConstantPool(), MVT::f32, - false, false, Alignment); + false, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index bd7dacf2bc69..ebf6b28259ea 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -921,6 +921,17 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // The target didn't want to custom lower it after all. return false; + // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to + // provide the same kind of custom splitting behavior. + if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { + // We've legalized a return type by splitting it. If there is a chain, + // replace that too. + SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); + if (N->getNumValues() > 1) + ReplaceValueWith(SDValue(N, 1), Results[2]); + return true; + } + // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 117ff31e2e8b..1cd9f407bca1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAG_LEGALIZETYPES_H -#define SELECTIONDAG_LEGALIZETYPES_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -122,8 +122,8 @@ public: explicit DAGTypeLegalizer(SelectionDAG &dag) : TLI(dag.getTargetLoweringInfo()), DAG(dag), ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); } /// run - This is the main entry point for the type legalizer. This does a @@ -240,6 +240,7 @@ private: SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_SDIV(SDNode *N); @@ -285,6 +286,8 @@ private: SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -387,6 +390,8 @@ private: SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FMINNUM(SDNode *N); + SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); @@ -450,6 +455,8 @@ private: void ExpandFloatResult(SDNode *N, unsigned ResNo); void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -574,6 +581,7 @@ private: void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -590,6 +598,7 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); @@ -623,6 +632,7 @@ private: SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 7e2f7b6ffb55..38829b6ac076 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -256,13 +256,13 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -271,7 +271,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -470,7 +470,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); - const MDNode *TBAAInfo = St->getTBAAInfo(); + AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -482,14 +482,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 507e7ffb1d45..eac404c50365 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -199,12 +199,30 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { - if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), + LD->getMemoryVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); - Changed = true; - return LegalizeOp(ExpandLoad(Op)); - } + case TargetLowering::Custom: + if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + Changed = true; + if (Lowered->getNumValues() != Op->getNumValues()) { + // This expanded to something other than the load. Assume the + // lowering code took care of any chain values, and just handle the + // returned value. + assert(Result.getValue(1).use_empty() && + "There are still live users of the old chain!"); + return LegalizeOp(Lowered); + } else { + return TranslateLegalizeResults(Op, Lowered); + } + } + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); EVT StVT = ST->getMemoryVT(); @@ -273,6 +291,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -353,9 +373,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) { return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); } - // The rest of the time, vector "promotion" is basically just bitcasting and - // doing the operation in a different type. For example, x86 promotes - // ISD::AND on v2i32 to v1i64. + // There are currently two cases of vector promotion: + // 1) Bitcasting a vector of integers to a different type to a vector of the + // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number oflarger + // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -365,14 +387,23 @@ SDValue VectorLegalizer::Promote(SDValue Op) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + if (Op.getOperand(j) + .getValueType() + .getVectorElementType() + .isFloatingPoint()) + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + else + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); else Operands[j] = Op.getOperand(j); } Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); - - return DAG.getNode(ISD::BITCAST, dl, VT, Op); + if (VT.isFloatingPoint() || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint())) + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); + else + return DAG.getNode(ISD::BITCAST, dl, VT, Op); } SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { @@ -480,7 +511,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment(), - LD->getTBAAInfo()); + LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -490,8 +521,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment(), - LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment(), LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -561,8 +592,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Op.getNode()->getValueType(0).getScalarType(), Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment(), LD->getTBAAInfo()); + LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment(), LD->getAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); @@ -593,7 +624,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -621,7 +652,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f77c592fddb7..96b69eec3354 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -69,8 +69,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::BSWAP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -104,6 +106,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FPOW: case ISD::FREM: case ISD::FSUB: @@ -221,7 +226,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->isInvariant(), N->getOriginalAlignment(), - N->getTBAAInfo()); + N->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -232,7 +237,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(0)); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } @@ -406,6 +427,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: @@ -449,11 +474,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } -/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs -/// to be scalarized, it must be <1 x ty>. Extend the element instead. +/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be +/// scalarized, it must be <1 x ty>. Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && - "Unexected vector type!"); + "Unexpected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); @@ -507,12 +532,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getTBAAInfo()); + N->getAlignment(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment(), N->getTBAAInfo()); + N->getOriginalAlignment(), N->getAAInfo()); } /// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs @@ -572,6 +597,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MLOAD: + SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); + break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; @@ -625,6 +653,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -866,6 +896,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, return; } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return; + // Spill the vector to the stack. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); @@ -921,14 +955,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment, TBAAInfo); + isInvariant, Alignment, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -936,7 +970,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -948,6 +982,64 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MLD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Ch = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + SDValue Mask = MLD->getMask(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MLD->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO); + + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MLD, 1), Ch); + +} + void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -1203,6 +1295,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); + break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; @@ -1347,6 +1442,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { Idx.getValueType())), 0); } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return SDValue(); + // Store the vector to the stack. EVT EltVT = VecVT.getVectorElementType(); SDLoc dl(N); @@ -1357,7 +1456,57 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT, false, false, 0); + MachinePointerInfo(), EltVT, false, false, false, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Data = N->getData(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + GetSplitVector(Data, DataLo, DataHi); + SDValue MaskLo, MaskHi; + GetSplitVector(Mask, MaskLo, MaskHi); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO); + + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1372,7 +1521,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1383,10 +1532,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment, TBAAInfo); + LoMemVT, isVol, isNT, Alignment, AAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment, TBAAInfo); + isVol, isNT, Alignment, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -1395,11 +1544,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment, TBAAInfo); + HiMemVT, isVol, isNT, Alignment, AAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment, TBAAInfo); + isVol, isNT, Alignment, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1564,6 +1713,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::MLOAD: + Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; case ISD::ADD: case ISD::AND: @@ -1573,6 +1725,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::SUB: case ISD::XOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: Res = WidenVecRes_Binary(N); break; @@ -2252,6 +2406,48 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { return Result; } +SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue Src0 = GetWidenedVector(N->getSrc0()); + SDLoc dl(N); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(WidenVT); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + + // Rebuild memory operand because MemoryVT was changed + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOLoad, WidenVT.getStoreSize(), + N->getAlignment(), N->getAAInfo(), N->getRanges()); + + SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, Src0, MMO); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), @@ -2735,7 +2931,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2746,7 +2942,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Align, - TBAAInfo); + AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2791,7 +2987,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment), - TBAAInfo); + AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2807,7 +3003,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment), - TBAAInfo); + AAInfo); LdChain.push_back(L.getValue(1)); } @@ -2887,7 +3083,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2899,7 +3096,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); + LdEltVT, isVolatile, isNonTemporal, isInvariant, + Align, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { @@ -2909,7 +3107,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align, TBAAInfo); + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2932,7 +3131,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2959,7 +3158,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -2979,7 +3178,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -3001,7 +3200,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -3025,7 +3224,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align, - TBAAInfo)); + AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -3036,7 +3235,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); } } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 624003f5070e..db38b76cf93a 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -40,32 +41,29 @@ static cl::opt<signed> RegPressureThreshold( "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), cl::desc("Track reg pressure and switch priority to in-depth")); +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) + : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + TRI = STI.getRegisterInfo(); + TLI = IS->TLI; + TII = STI.getInstrInfo(); + ResourcesModel = TII->CreateTargetScheduleState(STI); + // This hard requirement could be relaxed, but for now + // do not let it procede. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); + I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); -ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : - Picker(this), - InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData()) -{ - TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo(); - TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo(); - TLI = IS->getTargetLowering(); - - const TargetMachine &tm = (*IS->MF).getTarget(); - ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr); - // This hard requirement could be relaxed, but for now - // do not let it procede. - assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); - - unsigned NumRC = TRI->getNumRegClasses(); - RegLimit.resize(NumRC); - RegPressure.resize(NumRC); - std::fill(RegLimit.begin(), RegLimit.end(), 0); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); - - ParallelLiveRanges = 0; - HorizontalVerticalBalance = 0; + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; } unsigned @@ -319,7 +317,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { // If packet is now full, reset the state so in the next cycle // we start fresh. - if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { + if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { ResourcesModel->clearResources(); Packet.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index ee5429283112..bce69d79ab12 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H -#define LLVM_CODEGEN_SDNODEDBGVALUE_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" @@ -44,7 +44,8 @@ private: const Value *Const; // valid for constants unsigned FrameIx; // valid for stack objects } u; - MDNode *mdPtr; + MDNode *Var; + MDNode *Expr; bool IsIndirect; uint64_t Offset; DebugLoc DL; @@ -52,69 +53,72 @@ private: bool Invalid; public: // Constructor for non-constants. - SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, - bool indir, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), IsIndirect(indir), - Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, + uint64_t off, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; } // Constructor for constants. - SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, - unsigned O) : - mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, + DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. - SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } // Returns the kind. - DbgValueKind getKind() { return kind; } + DbgValueKind getKind() const { return kind; } - // Returns the MDNode pointer. - MDNode *getMDPtr() { return mdPtr; } + // Returns the MDNode pointer for the variable. + MDNode *getVariable() const { return Var; } + + // Returns the MDNode pointer for the expression. + MDNode *getExpression() const { return Expr; } // Returns the SDNode* for a register ref - SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; } + SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } // Returns the ResNo for a register ref - unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } + unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } // Returns the Value* for a constant - const Value *getConst() { assert (kind==CONST); return u.Const; } + const Value *getConst() const { assert (kind==CONST); return u.Const; } // Returns the FrameIx for a stack object - unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } // Returns whether this is an indirect value. - bool isIndirect() { return IsIndirect; } + bool isIndirect() const { return IsIndirect; } // Returns the offset. - uint64_t getOffset() { return Offset; } + uint64_t getOffset() const { return Offset; } // Returns the DebugLoc. - DebugLoc getDebugLoc() { return DL; } + DebugLoc getDebugLoc() const { return DL; } // Returns the SDNodeOrder. This is the order of the preceding node in the // input. - unsigned getOrder() { return Order; } + unsigned getOrder() const { return Order; } // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" // property. A SDDbgValue is invalid if the SDNode that produces the value is // deleted. void setIsInvalidated() { Invalid = true; } - bool isInvalidated() { return Invalid; } + bool isInvalidated() const { return Invalid; } }; } // end llvm namespace diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 4d8c2c78bce6..61a3fd728711 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -221,7 +221,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) @@ -229,7 +229,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } @@ -431,17 +431,23 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) - break; - ++NumRes; + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } } - return N->getValueType(NumRes); + return N->getSimpleValueType(NumRes); } /// CheckForLiveRegDef - Return true and update live register vector if the @@ -454,7 +460,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { - if (RegAdded.insert(*AI)) { + if (RegAdded.insert(*AI).second) { LRegs.push_back(*AI); Added = true; } @@ -572,7 +578,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index dedca41c3aab..8b54e6568b9e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -30,8 +30,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -166,12 +166,11 @@ public: NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits, nullptr) { - const TargetMachine &tm = mf.getTarget(); + const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( - tm.getSubtargetImpl(), this); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } ~ScheduleDAGRRList() { @@ -946,7 +945,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) @@ -954,7 +953,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } @@ -1189,17 +1188,23 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) - break; - ++NumRes; + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } } - return N->getValueType(NumRes); + return N->getSimpleValueType(NumRes); } /// CheckForLiveRegDef - Return true and update live register vector if the @@ -1218,7 +1223,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, if (LiveRegDefs[*AliasI] == SU) continue; // Add Reg to the set of interfering live regs. - if (RegAdded.insert(*AliasI)) { + if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); } } @@ -1235,7 +1240,7 @@ static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, if (!LiveRegDefs[i]) continue; if (LiveRegDefs[i] == SU) continue; if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; - if (RegAdded.insert(i)) + if (RegAdded.insert(i).second) LRegs.push_back(i); } } @@ -1310,7 +1315,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { SDNode *Gen = LiveRegGens[CallResource]->getNode(); while (SDNode *Glued = Gen->getGluedNode()) Gen = Glued; - if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + if (!IsChainDependent(Gen, Node, 0, TII) && + RegAdded.insert(CallResource).second) LRegs.push_back(CallResource); } } @@ -1439,7 +1445,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -1930,8 +1936,8 @@ void RegReductionPQBase::dumpRegPressure() const { unsigned Id = RC->getID(); unsigned RP = RegPressure[Id]; if (!RP) continue; - DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] - << '\n'); + DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); } #endif } @@ -2754,7 +2760,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!SUImpDefs && !SURegMask) continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) @@ -2977,9 +2983,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { llvm::ScheduleDAGSDNodes * llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); @@ -2991,9 +2997,9 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); @@ -3005,10 +3011,10 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); @@ -3021,10 +3027,10 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index de910b7c861b..f2b18fc84bf5 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -38,17 +37,17 @@ using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); -// This allows latency based scheduler to notice high latency instructions -// without a target itinerary. The choise if number here has more to do with -// balancing scheduler heursitics than with the actual machine latency. +// This allows the latency-based scheduler to notice high latency instructions +// without a target itinerary. The choice of number here has more to do with +// balancing scheduler heuristics than with the actual machine latency. static cl::opt<int> HighLatencyCycles( "sched-high-latency-cycles", cl::Hidden, cl::init(10), cl::desc("Roughly estimate the number of cycles that 'long latency'" "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), - InstrItins(mf.getTarget().getInstrItineraryData()) {} + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), + InstrItins(mf.getSubtarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -120,15 +119,20 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned ResNo = User->getOperand(2).getResNo(); - if (Def->isMachineOpcode()) { + if (Def->getOpcode() == ISD::CopyFromReg && + cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) { + PhysReg = Reg; + } else if (Def->isMachineOpcode()) { const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) PhysReg = Reg; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); - Cost = RC->getCopyCost(); - } + } + + if (PhysReg != 0) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); + Cost = RC->getCopyCost(); } } @@ -136,7 +140,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, SmallVectorImpl<EVT> &VTs, SDValue ExtraOper = SDValue()) { - SmallVector<SDValue, 4> Ops; + SmallVector<SDValue, 8> Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); @@ -226,7 +230,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E && UseCount < 100; ++I, ++UseCount) { SDNode *User = *I; - if (User == Node || !Visited.insert(User)) + if (User == Node || !Visited.insert(User).second) continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || @@ -339,7 +343,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) - if (Visited.insert(NI->getOperand(i).getNode())) + if (Visited.insert(NI->getOperand(i).getNode()).second) Worklist.push_back(NI->getOperand(i).getNode()); if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. @@ -425,7 +429,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = forceUnitLatencies(); @@ -547,6 +551,14 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { NodeNumDefs = 0; return; } + if (POpc == TargetOpcode::PATCHPOINT && + Node->getValueType(0) == MVT::Other) { + // PATCHPOINT is defined to have one result, but it might really have none + // if we're not using CallingConv::AnyReg. Don't mistake the chain for a + // real definition. + NodeNumDefs = 0; + return; + } unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); // Some instructions define regs that are not represented in the selection DAG // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. @@ -733,7 +745,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, SmallSet<unsigned, 8> &Seen) { unsigned Order = N->getIROrder(); - if (!Order || !Seen.insert(Order)) { + if (!Order || !Seen.insert(Order).second) { // Process any valid SDDbgValues even if node does not have any order // assigned. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 39ebadf3011c..2cd1f4b9bd47 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SCHEDULEDAGSDNODES_H -#define SCHEDULEDAGSDNODES_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 4589b0c35dc3..418b58eda394 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -71,10 +72,8 @@ public: AliasAnalysis *aa, SchedulingPriorityQueue *availqueue) : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { - - const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( - tm.getSubtargetImpl(), this); + const TargetSubtargetInfo &STI = mf.getSubtarget(); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } ~ScheduleDAGVLIW() { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a5555364c82a..c819516eca04 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -46,6 +46,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> @@ -95,7 +96,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, /// BUILD_VECTOR where all of the elements are ~0 or undef. bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BITCAST) + while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -143,7 +144,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { /// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BITCAST) + while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -233,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } -ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { +ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: - return ISD::ANY_EXTEND; + return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; case ISD::SEXTLOAD: return ISD::SIGN_EXTEND; case ISD::ZEXTLOAD: @@ -686,6 +687,15 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { DeallocateNode(N); } +void SDDbgInfo::erase(const SDNode *Node) { + DbgValMapType::iterator I = DbgValMap.find(Node); + if (I == DbgValMap.end()) + return; + for (auto &Val: I->second) + Val->setIsInvalidated(); + DbgValMap.erase(I); +} + void SelectionDAG::DeallocateNode(SDNode *N) { if (N->OperandsNeedDelete) delete[] N->OperandList; @@ -696,10 +706,60 @@ void SelectionDAG::DeallocateNode(SDNode *N) { NodeAllocator.Deallocate(AllNodes.remove(N)); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. - ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); - for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) - DbgVals[i]->setIsInvalidated(); + // If any of the SDDbgValue nodes refer to this SDNode, invalidate + // them and forget about that node. + DbgInfo->erase(N); +} + +#ifndef NDEBUG +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } + break; + } + } +} +#endif // NDEBUG + +/// \brief Insert a newly allocated node into the DAG. +/// +/// Handles insertion into the all nodes list and CSE map, as well as +/// verification and other common operations when a new node is allocated. +void SelectionDAG::InsertNode(SDNode *N) { + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -839,83 +899,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, return Node; } -#ifndef NDEBUG -/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid. -static void VerifyNodeCommon(SDNode *N) { - switch (N->getOpcode()) { - default: - break; - case ISD::BUILD_PAIR: { - EVT VT = N->getValueType(0); - assert(N->getNumValues() == 1 && "Too many results!"); - assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && - "Wrong return type!"); - assert(N->getNumOperands() == 2 && "Wrong number of operands!"); - assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && - "Mismatched operand types!"); - assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && - "Wrong operand type!"); - assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && - "Wrong return type size"); - break; - } - case ISD::BUILD_VECTOR: { - assert(N->getNumValues() == 1 && "Too many results!"); - assert(N->getValueType(0).isVector() && "Wrong return type!"); - assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && - "Wrong number of operands!"); - EVT EltVT = N->getValueType(0).getVectorElementType(); - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { - assert((I->getValueType() == EltVT || - (EltVT.isInteger() && I->getValueType().isInteger() && - EltVT.bitsLE(I->getValueType()))) && - "Wrong operand type!"); - assert(I->getValueType() == N->getOperand(0).getValueType() && - "Operands must all have the same type"); - } - break; - } - } -} - -/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. -static void VerifySDNode(SDNode *N) { - // The SDNode allocators cannot be used to allocate nodes with fields that are - // not present in an SDNode! - assert(!isa<MemSDNode>(N) && "Bad MemSDNode!"); - assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!"); - assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!"); - assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!"); - assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!"); - assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!"); - assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!"); - assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!"); - assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!"); - assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!"); - assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!"); - assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!"); - assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!"); - assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!"); - assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!"); - assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!"); - assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!"); - assert(!isa<VTSDNode>(N) && "Bad VTSDNode!"); - assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!"); - - VerifyNodeCommon(N); -} - -/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is -/// invalid. -static void VerifyMachineNode(SDNode *N) { - // The MachineNode allocators cannot be used to allocate nodes with fields - // that are not present in a MachineNode! - // Currently there are no such nodes. - - VerifyNodeCommon(N); -} -#endif // NDEBUG - /// getEVTAlignment - Compute the default alignment value for the /// given type. /// @@ -924,22 +907,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); + return TLI->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(nullptr) { + : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(nullptr) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf) { MF = &mf; - TLI = tli; + TLI = getSubtarget().getTargetLowering(); + TSI = getSubtarget().getSelectionDAGInfo(); Context = &mf.getFunction()->getContext(); } @@ -1108,8 +1092,6 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; - const TargetLowering *TLI = TM.getTargetLowering(); - // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). @@ -1185,7 +1167,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, if (!N) { N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); } SDValue Result(N, 0); @@ -1198,7 +1180,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); + return getConstant(Val, TLI->getPointerTy(), isTarget); } @@ -1227,7 +1209,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (!N) { N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); } SDValue Result(N, 0); @@ -1263,7 +1245,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); - const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); @@ -1290,7 +1271,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1305,7 +1286,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1325,7 +1306,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1336,8 +1317,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = - TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1352,7 +1332,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1364,8 +1344,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = - TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1380,7 +1359,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1398,7 +1377,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1412,7 +1391,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1426,7 +1405,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { if (N) return SDValue(N, 0); N = new (NodeAllocator) VTSDNode(VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1434,7 +1413,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1445,7 +1424,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, TargetFlags)]; if (N) return SDValue(N, 0); N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1456,7 +1435,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { if (!CondCodeNodes[Cond]) { CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; - AllNodes.push_back(N); + InsertNode(N); } return SDValue(CondCodeNodes[Cond], 0); @@ -1594,7 +1573,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, dl.getDebugLoc(), N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1640,7 +1619,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, dl.getDebugLoc(), Ops, Code); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1654,7 +1633,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1668,7 +1647,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1684,7 +1663,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1707,7 +1686,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1725,7 +1704,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { SDNode *N = new (NodeAllocator) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1741,7 +1720,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1762,7 +1741,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, dl.getDebugLoc(), VT, Ptr, SrcAS, DestAS); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1770,7 +1749,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); + EVT ShTy = TLI->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1783,7 +1762,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); - const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); @@ -1798,7 +1776,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const TargetLowering *TLI = TM.getTargetLowering(); const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1817,7 +1794,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(N1->getValueType(0)); return getConstant( @@ -1906,7 +1882,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, // Ensure that the constant occurs on the RHS. ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); MVT CompVT = N1.getValueType().getSimpleVT(); - if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) return SDValue(); return getSetCC(dl, VT, N2, N1, SwappedCond); @@ -1942,7 +1918,6 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// them in the KnownZero/KnownOne bitsets. void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { - const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -2378,7 +2353,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ - const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2676,10 +2650,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -2712,6 +2683,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(apf, VT); } case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) @@ -2774,7 +2747,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FP_TO_UINT: { integerPart x[2]; bool ignored; - assert(integerPartWidth >= 64); + static_assert(integerPartWidth >= 64, "APFloat parts too small!"); // FIXME need to be more flexible about rounding mode. APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), Opcode==ISD::FP_TO_SINT, @@ -2785,7 +2758,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstant(api, VT); } case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), VT); @@ -2977,10 +2952,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, DL.getDebugLoc(), VTs, Operand); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3411,8 +3383,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Perform trivial constant folding. - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); - if (SV.getNode()) return SV; + if (SDValue SV = + FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode())) + return SV; // Canonicalize constant to RHS if commutative. if (N1C && !N2C && isCommutativeBinOp(Opcode)) { @@ -3421,6 +3394,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Constant fold FP operations. + bool HasFPExceptions = TLI->hasFloatingPointExceptions(); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); if (N1CFP) { @@ -3434,28 +3408,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, switch (Opcode) { case ISD::FADD: s = V1.add(V2, APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) + if (!HasFPExceptions || s != APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FSUB: s = V1.subtract(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp) + if (!HasFPExceptions || s!=APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FMUL: s = V1.multiply(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp) + if (!HasFPExceptions || s!=APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FDIV: s = V1.divide(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { return getConstantFP(V1, VT); + } break; case ISD::FREM : s = V1.mod(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { return getConstantFP(V1, VT); + } break; case ISD::FCOPYSIGN: V1.copySign(V2); @@ -3572,10 +3550,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3679,10 +3654,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, DL.getDebugLoc(), VTs, N1, N2, N3); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3848,7 +3820,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT == MVT::Other) { unsigned AS = 0; if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || - TLI.allowsUnalignedMemoryAccesses(VT, AS)) { + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3908,7 +3880,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -3972,7 +3944,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, // Don't promote to an alignment that would require dynamic stack // realignment. - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) @@ -4028,7 +4000,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, - MinAlign(SrcAlign, SrcOff)); + false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), VT, isVol, @@ -4248,9 +4220,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - isVol, AlwaysInline, - DstPtrInfo, SrcPtrInfo); + TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -4269,8 +4240,6 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. - const TargetLowering *TLI = TM.getTargetLowering(); - // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4316,17 +4285,14 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstPtrInfo, SrcPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. - const TargetLowering *TLI = TM.getTargetLowering(); - // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4371,31 +4337,22 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, + Size, Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; // Emit a library call. - const TargetLowering *TLI = TM.getTargetLowering(); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); - // Extend or truncate the argument to be an i32 value for the call. - if (Src.getValueType().bitsGT(MVT::i32)) - Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); - else - Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); Entry.Node = Src; - Entry.Ty = Type::getInt32Ty(*getContext()); - Entry.isSExt = true; + Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); Args.push_back(Entry); Entry.Node = Size; Entry.Ty = IntPtrTy; - Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in SDLoc @@ -4442,7 +4399,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SuccessOrdering, FailureOrdering, SynchScope); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4587,7 +4544,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, - bool ReadMem, bool WriteMem) { + bool ReadMem, bool WriteMem, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -4599,8 +4556,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, Flags |= MachineMemOperand::MOLoad; if (Vol) Flags |= MachineMemOperand::MOVolatile; + if (!Size) + Size = MemVT.getStoreSize(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -4639,7 +4598,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, dl.getDebugLoc(), VTList, Ops, MemVT, MMO); } - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4683,7 +4642,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const MDNode *TBAAInfo, + unsigned Alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4706,7 +4665,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4755,7 +4714,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4764,12 +4723,12 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); } SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, @@ -4784,11 +4743,12 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, - TBAAInfo); + PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); } @@ -4815,7 +4775,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + unsigned Alignment, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4834,7 +4794,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, Val.getValueType().getStoreSize(), Alignment, - TBAAInfo); + AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -4862,7 +4822,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, dl.getDebugLoc(), VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4870,7 +4830,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, - const MDNode *TBAAInfo) { + const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4888,7 +4848,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, - TBAAInfo); + AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -4931,7 +4891,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, dl.getDebugLoc(), VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4958,7 +4918,61 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, + MachineMemOperand *MMO) { + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, VTs, + VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, + VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); return SDValue(N, 0); } @@ -5037,10 +5051,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, VTs, Ops); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -5120,15 +5131,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, VTList, Ops); } } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, ArrayRef<SDValue>()); + return getNode(Opcode, DL, VTList, None); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, @@ -5510,6 +5518,10 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// node, and because it doesn't require CSE recalculation for any of /// the node's users. /// +/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. +/// As a consequence it isn't appropriate to use from within the DAG combiner or +/// the legalizer which maintain worklists that would need to be updated when +/// deleting things. SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef<SDValue> Ops) { unsigned NumOps = Ops.size(); @@ -5576,10 +5588,9 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // new operands. if (!DeadNodeSet.empty()) { SmallVector<SDNode *, 16> DeadNodes; - for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), - E = DeadNodeSet.end(); I != E; ++I) - if ((*I)->use_empty()) - DeadNodes.push_back(*I); + for (SDNode *N : DeadNodeSet) + if (N->use_empty()) + DeadNodes.push_back(N); RemoveDeadNodes(DeadNodes); } @@ -5748,10 +5759,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, if (DoCSE) CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); -#ifndef NDEBUG - VerifyMachineNode(N); -#endif + InsertNode(N); return N; } @@ -5797,26 +5805,24 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode -SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, - bool IsIndirect, uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O); +SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, + unsigned R, bool IsIndirect, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); } /// Constant -SDDbgValue * -SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C, - uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); +SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, + const Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); } /// FrameIndex -SDDbgValue * -SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, + unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); } namespace { @@ -6205,9 +6211,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() { /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { - DbgInfo->add(DB, SD, isParameter); - if (SD) + if (SD) { + assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); SD->setHasDebugValue(true); + } + DbgInfo->add(DB, SD, isParameter); } /// TransferDbgValues - Transfer SDDbgValues. @@ -6222,10 +6230,10 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { I != E; ++I) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { - SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), - Dbg->isIndirect(), - Dbg->getOffset(), Dbg->getDebugLoc(), - Dbg->getOrder()); + SDDbgValue *Clone = + getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, + To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), + Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); } } @@ -6263,7 +6271,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); - assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); + // We check here that the size of the memory operand fits within the size of + // the MMO. This is because the MMO might indicate only a possible address + // range instead of specifying the affected memory addresses precisely. + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, @@ -6273,7 +6284,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); - assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -6417,7 +6428,7 @@ bool SDNode::hasPredecessor(const SDNode *N) const { bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet<const SDNode *, 32> &Visited, + SmallPtrSetImpl<const SDNode *> &Visited, SmallVectorImpl<const SDNode *> &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); @@ -6433,7 +6444,7 @@ SDNode::hasPredecessorHelper(const SDNode *N, const SDNode *M = Worklist.pop_back_val(); for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { SDNode *Op = M->getOperand(i).getNode(); - if (Visited.insert(Op)) + if (Visited.insert(Op).second) Worklist.push_back(Op); if (Op == N) return true; @@ -6473,7 +6484,6 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. - const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, @@ -6544,16 +6554,29 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - // Handle X+C - if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && - cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) - return true; - + // Handle X + C. + if (isBaseWithConstantOffset(Loc)) { + int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc) { + // If the base location is a simple address with no offset itself, then + // the second load's first add operand should be the base address. + if (LocOffset == Dist * (int)Bytes) + return true; + } else if (isBaseWithConstantOffset(BaseLoc)) { + // The base location itself has an offset, so subtract that value from the + // second load's offset before comparing to distance * size. + int64_t BOffset = + cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { + if ((LocOffset - BOffset) == Dist * (int)Bytes) + return true; + } + } + } const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; - const TargetLowering *TLI = TM.getTargetLowering(); bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) @@ -6568,7 +6591,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); @@ -6795,8 +6817,8 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, - SmallPtrSet<const SDNode*, 32> &Visited, - SmallPtrSet<const SDNode*, 32> &Checked, + SmallPtrSetImpl<const SDNode*> &Visited, + SmallPtrSetImpl<const SDNode*> &Checked, const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) @@ -6804,7 +6826,7 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. - if (!Visited.insert(N)) { + if (!Visited.insert(N).second) { errs() << "Detected cycle in SelectionDAG\n"; dbgs() << "Offending node:\n"; N->dumprFull(DAG); dbgs() << "\n"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c07b5e6a7362..86a63eea7c2a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" @@ -46,6 +47,8 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -58,6 +61,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -563,6 +567,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. + assert(NumIntermediates != 0 && "division by zero"); assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; @@ -645,8 +650,10 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V) const; + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -761,9 +768,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + SDValue &Chain, SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); @@ -772,8 +780,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; - ISD::NodeType ExtendKind = - TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; + + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); @@ -860,7 +869,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getTarget().getDataLayout(); + DL = DAG.getSubtarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -879,6 +888,7 @@ void SelectionDAGBuilder::clear() { CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; + StatepointLowering.clear(); } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -988,15 +998,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); MDNode *Variable = DI->getVariable(); + MDNode *Expr = DI->getExpression(); uint64_t Offset = DI->getOffset(); // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { - SDV = DAG.getDbgValue(Variable, Val.getNode(), - Val.getResNo(), IsIndirect, - Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, + Val)) { + SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), + IsIndirect, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1018,8 +1029,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), - InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + V->getType()); SDValue Chain = DAG.getEntryNode(); N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, N); @@ -1050,10 +1061,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast<Constant>(V)) { - EVT VT = TLI->getValueType(V->getType(), true); + EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return DAG.getConstant(*CI, VT); @@ -1063,7 +1074,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ConstantPointerNull>(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI->getPointerTy(AS)); + return DAG.getConstant(0, TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) @@ -1117,7 +1128,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, C->getType(), ValueVTs); + ComputeValueVTs(TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1149,7 +1160,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - EVT EltVT = TLI->getValueType(VecTy->getElementType()); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1169,13 +1180,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1184,7 +1195,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; @@ -1197,7 +1208,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1205,7 +1216,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); @@ -1224,28 +1235,33 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + const Function *F = I.getParent()->getParent(); + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + LLVMContext &Context = F->getContext(); + bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg); - const Function *F = I.getParent()->getParent(); - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; + for (unsigned j = 0; j != NumValues; ++j) { + EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(Context, VT); + MVT PartVT = TLI.getRegisterType(Context, VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -1253,8 +1269,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::InReg)) + if (RetInReg) Flags.setInReg(); // Propagate extension type if any @@ -1275,9 +1290,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurSDLoc(), - DAG); + Chain = DAG.getTargetLoweringInfo().LowerReturn( + Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1400,7 +1414,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } else { - Condition = ISD::SETEQ; // silence warning. + (void)Condition; // silence warning. llvm_unreachable("Unknown compare instruction"); } @@ -1601,10 +1615,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!TM.getTargetLowering()->isJumpExpensive() && - BOp->hasOneUse() && - (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && + BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), getEdgeWeight(BrMBB, Succ1MBB)); @@ -1724,7 +1737,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TM.getTargetLowering()->getPointerTy(); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1752,10 +1765,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - const TargetLowering *TLI = TM.getTargetLowering(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1763,12 +1776,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, - DAG.getConstant(JTH.Last - JTH.First,VT), - ISD::SETUGT); + SDValue CMP = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1799,8 +1810,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { // First create the loads to the guard/stack slot for the comparison. - const TargetLowering *TLI = TM.getTargetLowering(); - EVT PtrTy = TLI->getPointerTy(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(); MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); @@ -1810,10 +1821,22 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); - SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), - GuardPtr, MachinePointerInfo(IRGuard, 0), - true, false, false, Align); + TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + + SDValue Guard; + + // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the + // guard value from the virtual register holding the value. Otherwise, emit a + // volatile load to retrieve the stack guard value. + unsigned GuardReg = SPD.getGuardReg(); + + if (GuardReg && TLI.useLoadStackGuardNode()) + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + PtrTy); + else + Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), StackSlotPtr, @@ -1824,11 +1847,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); - SDValue Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, VT), - ISD::SETNE); + SDValue Cmp = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. @@ -1853,10 +1875,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { - const TargetLowering *TLI = TM.getTargetLowering(); - SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, - MVT::isVoid, nullptr, 0, false, - getCurSDLoc(), false, false).second; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = + TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, + nullptr, 0, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1871,16 +1893,15 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, DAG.getConstant(B.First, VT)); // Check range - const TargetLowering *TLI = TM.getTargetLowering(); - SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue RangeCmp = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), - ISD::SETUGT); + Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI->isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1892,7 +1913,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI->getPointerTy(); + VT = TLI.getPointerTy(); Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } @@ -1936,22 +1957,18 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), - ISD::SETEQ); + Cmp = DAG.getSetCC( + getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - ShiftOp, - DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), - ISD::SETNE); + Cmp = DAG.getSetCC( + getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, @@ -1961,9 +1978,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - AndOp, DAG.getConstant(0, VT), - ISD::SETNE); + TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -2001,8 +2017,17 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { if (isa<InlineAsm>(Callee)) visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { - assert(Fn->getIntrinsicID() == Intrinsic::donothing); - // Ignore invokes to @llvm.donothing: jump directly to the next BB. + switch (Fn->getIntrinsicID()) { + default: + llvm_unreachable("Cannot invoke this intrinsic"); + case Intrinsic::donothing: + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + visitPatchpoint(&I, LandingPad); + break; + } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); @@ -2034,26 +2059,30 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getExceptionPointerRegister() == 0 && - TLI->getExceptionSelectorRegister() == 0) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) return; SmallVector<EVT, 2> ValueVTs; - ComputeValueVTs(*TLI, LP.getType(), ValueVTs); + ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; - Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); + } Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[1]); // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), @@ -2061,6 +2090,27 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } +unsigned +SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadBB) { + SDValue Chain = getControlRoot(); + + // Get the typeid that we will dispatch on later. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); + SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); + + // Branch to the main landing pad block. + MachineBasicBlock *ClauseMBB = FuncInfo.MBB; + ClauseMBB->addSuccessor(LPadBB); + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, + DAG.getBasicBlock(LPadBB))); + return VReg; +} + /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, @@ -2218,9 +2268,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.supportJumpTables() && - (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } static APInt ComputeRange(const APInt &First, const APInt &Last) { @@ -2245,8 +2294,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2327,7 +2376,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI->getJumpTableEncoding(); + unsigned JTEncoding = TLI.getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2347,7 +2396,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock* Default, MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. @@ -2413,8 +2461,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, RSize -= J->size(); } - const TargetLowering *TLI = TM.getTargetLowering(); - if (areJTsAllowed(*TLI)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (areJTsAllowed(TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2484,8 +2532,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const Value* SV, MachineBasicBlock* Default, MachineBasicBlock* SwitchBB) { - const TargetLowering *TLI = TM.getTargetLowering(); - EVT PTy = TLI->getPointerTy(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2496,19 +2544,18 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI->isOperationLegal(ISD::SHL, PTy)) + if (!TLI.isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { // Single case counts one, case range - two. numCmps += (I->Low == I->High ? 1 : 2); } // Count unique destinations SmallSet<MachineBasicBlock*, 4> Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { Dests.insert(I->BB); if (Dests.size() > 3) // Don't bother the code below, if there are too much unique destinations @@ -2601,25 +2648,22 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, BitTestBlock BTB(lowBound, cmpRange, SV, -1U, MVT::Other, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, BTC); + CR.CaseBB, Default, std::move(BTC)); if (CR.CaseBB == SwitchBB) visitBitTestHeader(BTB, SwitchBB); - BitTestCases.push_back(BTB); + BitTestCases.push_back(std::move(BTB)); return true; } /// Clusterify - Transform simple list of Cases into list of CaseRange's -size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { - size_t numCmps = 0; - +void SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases - for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); - i != e; ++i) { + // Start with "simple" cases. + for (SwitchInst::ConstCaseIt i : SI.cases()) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; @@ -2653,13 +2697,15 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } } - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; - } + DEBUG({ + size_t numCmps = 0; + for (auto &I : Cases) + // A range counts double, since it requires two compares. + numCmps += I.Low != I.High ? 2 : 1; - return numCmps; + dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'; + }); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, @@ -2680,35 +2726,58 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = nullptr; + if (SwitchMBB + 1 != FuncInfo.MF->end()) + NextBlock = SwitchMBB + 1; + + + // Create a vector of Cases, sorted so that we can efficiently create a binary + // search tree from them. + CaseVector Cases; + Clusterify(Cases, SI); + + // Get the default destination MBB. MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - // If there is only the default destination, branch to it if it is not the - // next basic block. Otherwise, just fall through. - if (!SI.getNumCases()) { - // Update machine-CFG edges. + if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && + !Cases.empty()) { + // Replace an unreachable default destination with the most popular case + // destination. + DenseMap<const BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } - // If this is not a fall-through branch, emit the branch. + // Set new default. + assert(MaxPop > 0); + assert(MaxBB); + Default = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the default. + Cases.erase(std::remove_if(Cases.begin(), Cases.end(), + [&](const Case &C) { return C.BB == Default; }), + Cases.end()); + } + + // If there is only the default destination, go there directly. + if (Cases.empty()) { + // Update machine-CFG edges. SwitchMBB->addSuccessor(Default); - if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Default))); + // If this is not a fall-through branch, emit the branch. + if (Default != NextBlock) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Default))); + } return; } - // If there are any non-default case statements, create a vector of Cases - // representing each one, and sort the vector so that we can efficiently - // create a binary search tree from them. - CaseVector Cases; - size_t numCmps = Clusterify(Cases, SI); - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'); - (void)numCmps; - - // Get the Value to be switched on and default basic blocks, which will be - // inserted into CaseBlock records, representing basic blocks in the binary - // search tree. + // Get the Value to be switched on. const Value *SV = SI.getCondition(); // Push the initial CaseRec onto the worklist @@ -2738,7 +2807,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Emit binary tree. We need to pick a pivot, and push left and right ranges // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); + handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); } } @@ -2749,7 +2818,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { SmallSet<BasicBlock*, 32> Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); - bool Inserted = Done.insert(BB); + bool Inserted = Done.insert(BB).second; if (!Inserted) continue; @@ -2806,7 +2875,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = + DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2861,8 +2931,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && !isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) - setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, - getCurSDLoc(), DAG)); + setValue(&I, DAG.getTargetLoweringInfo() + .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); else setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); @@ -2878,7 +2948,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } @@ -2893,13 +2963,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); + ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2926,7 +2996,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } @@ -2934,7 +3004,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2942,52 +3012,51 @@ void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT DestVT = TLI->getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), - DestVT, N, - DAG.getTargetConstant(0, TLI->getPointerTy()))); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } @@ -2995,7 +3064,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } @@ -3003,13 +3072,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. @@ -3031,7 +3100,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); @@ -3049,8 +3118,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), - TM.getTargetLowering()->getValueType(I.getType()), - InVec, InVal, InIdx)); + TLI.getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { @@ -3059,8 +3127,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - TM.getTargetLowering()->getValueType(I.getType()), - InVec, InIdx)); + TLI.getValueType(I.getType()), InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -3082,8 +3149,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -3202,9 +3269,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, - Src, DAG.getConstant(StartIdx[Input], - TLI->getVectorIdxTy())); + Src = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, + DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); } // Calculate new mask. @@ -3230,7 +3297,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI->getVectorIdxTy(); + EVT IdxVT = TLI.getVectorIdxTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3262,16 +3329,22 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(*TLI, AggTy, AggValueVTs); + ComputeValueVTs(TLI, AggTy, AggValueVTs); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(*TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector<SDValue, 4> Values(NumAggValues); + // Ignore an insertvalue that produces an empty object + if (!NumAggValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + SDValue Agg = getValue(Op0); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. @@ -3302,9 +3375,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(*TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3353,13 +3426,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI->getPointerTy(AS); + EVT PTy = TLI.getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, @@ -3373,8 +3446,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - DL->getTypeAllocSize(Ty)); + APInt ElementSize = + APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3411,15 +3484,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - const TargetLowering *TLI = TM.getTargetLowering(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), - I.getAlignment()); + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI->getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); @@ -3430,7 +3503,8 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -3464,15 +3538,18 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; - bool isInvariant = I.getMetadata("invariant.load") != nullptr; + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; unsigned Alignment = I.getAlignment(); - const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3483,7 +3560,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( - AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3492,9 +3569,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } - const TargetLowering *TLI = TM.getTargetLowering(); if (isVolatile) - Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), @@ -3520,7 +3596,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, TBAAInfo, + isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); Values[i] = L; @@ -3549,7 +3625,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), + ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3565,9 +3642,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); - const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -3583,7 +3662,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue St = DAG.getStore(Root, getCurSDLoc(), SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } @@ -3592,28 +3671,70 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } -static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, - SynchronizationScope Scope, - bool Before, SDLoc dl, - SelectionDAG &DAG, - const TargetLowering &TLI) { - // Fence, if necessary - if (Before) { - if (Order == AcquireRelease || Order == SequentiallyConsistent) - Order = Release; - else if (Order == Acquire || Order == Monotonic || Order == Unordered) - return Chain; - } else { - if (Order == AcquireRelease) - Order = Acquire; - else if (Order == Release || Order == Monotonic || Order == Unordered) - return Chain; +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + Value *PtrOperand = I.getArgOperand(1); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory( + AliasAnalysis::Location(PtrOperand, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); } - SDValue Ops[3]; - Ops[0] = Chain; - Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { @@ -3624,27 +3745,16 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, - DAG, *TLI); - MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3671,38 +3781,28 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue L = DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), - I.getPointerOperand(), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + I.getPointerOperand(), + /* Alignment=*/ 0, Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3713,8 +3813,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3728,19 +3828,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT)); - InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); + InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), MMO, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3753,28 +3848,19 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getValueOperand()->getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); - - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); + Order, Scope); DAG.setRoot(OutChain); } @@ -3799,13 +3885,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - const TargetLowering *TLI = TM.getTargetLowering(); - bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3814,7 +3900,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, I.getType(), ValueVTs); + ComputeValueVTs(TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3829,7 +3915,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, - Info.readMem, Info.writeMem); + Info.readMem, Info.writeMem, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -3848,7 +3934,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI->getValueType(PTy); + EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } @@ -4555,16 +4641,17 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. -bool -SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, bool IsIndirect, - const SDValue &N) { +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, + MDNode *Variable, + MDNode *Expr, int64_t Offset, + bool IsIndirect, + const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); - const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); @@ -4610,14 +4697,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, return false; if (Op->isReg()) - FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, - Op->getReg(), Offset, Variable)); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(Offset) + .addMetadata(Variable) + .addMetadata(Expr)); return true; } @@ -4635,7 +4724,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4649,17 +4738,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); - SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); - EVT VT = TM.getTargetLowering()->getValueType(I.getType()); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); + EVT VT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); return nullptr; } @@ -4667,15 +4757,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); SDValue Chain = getValue(RegValue).getOperand(0); - SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); return nullptr; } case Intrinsic::setjmp: - return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4736,6 +4827,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); + MDNode *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); DIVariable DIVar(Variable); assert((!DIVar || DIVar.isVariable()) && @@ -4771,16 +4863,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (FINode) // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), - 0, dl, SDNodeOrder); + SDV = DAG.getFrameIndexDbgValue( + Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); return nullptr; } } else if (AI) - SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. @@ -4793,7 +4885,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, + N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { @@ -4801,7 +4894,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, 0, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; @@ -4822,6 +4915,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; MDNode *Variable = DI.getVariable(); + MDNode *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4829,7 +4923,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, + SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at @@ -4841,10 +4936,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { - SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), IsIndirect, - Offset, dl, SDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, + IsIndirect, N)) { + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), + IsIndirect, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4878,7 +4973,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); + GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4899,15 +4994,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI->getPointerTy()); + TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, sdl, CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, - TLI->getPointerTy(), - DAG.getConstant(0, TLI->getPointerTy())); + SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), + DAG.getConstant(0, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4946,6 +5040,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_load: + visitMaskedLoad(I); + return nullptr; + case Intrinsic::masked_store: + visitMaskedStore(I); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -4997,7 +5097,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), @@ -5009,14 +5109,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI->getValueType(I.getType()); - EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI->getVectorIdxTy())); + Res = + DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, TLI.getVectorIdxTy())); setValue(&I, Res); return nullptr; } @@ -5024,12 +5124,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI->getVectorIdxTy())); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); setValue(&I, Res); return nullptr; } @@ -5055,7 +5155,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), @@ -5071,23 +5171,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG)); return nullptr; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, *TLI)); + getValue(I.getArgOperand(1)), DAG, TLI)); return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -5119,6 +5219,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -5133,9 +5245,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { - EVT VT = TLI->getValueType(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI->isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -5162,7 +5274,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, - DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()), + DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return nullptr; @@ -5209,7 +5321,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); + DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; @@ -5223,9 +5335,44 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI->getPointerTy(); + EVT PtrTy = TLI.getPointerTy(); + SDValue Src, Chain = getRoot(); + const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); + + // See if Ptr is a bitcast. If it is, look through it and see if we can get + // global variable __stack_chk_guard. + if (!GV) + if (const Operator *BC = dyn_cast<Operator>(Ptr)) + if (BC->getOpcode() == Instruction::BitCast) + GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); + + if (GV && TLI.useLoadStackGuardNode()) { + // Emit a LOAD_STACK_GUARD node. + MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, + sdl, PtrTy, Chain); + MachinePointerInfo MPInfo(GV); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + unsigned Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, + PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + + // Copy the guard value to a virtual register so that it can be + // retrieved in the epilogue. + Src = SDValue(Node, 0); + const TargetRegisterClass *RC = + TLI.getRegClassFor(Src.getSimpleValueType()); + unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); + + SPDescriptor.setGuardReg(Reg); + Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); + } else { + Src = getValue(I.getArgOperand(0)); // The guard's value. + } - SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; @@ -5234,7 +5381,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), sdl, Src, FIN, + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5263,8 +5410,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; + case Intrinsic::assume: case Intrinsic::var_annotation: - // Discard annotate attributes + // Discard annotate attributes and assumptions return nullptr; case Intrinsic::init_trampoline: { @@ -5285,7 +5433,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, - TLI->getPointerTy(), + TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; } @@ -5325,10 +5473,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()) .setCallee(CallingConv::C, I.getType(), - DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), std::move(Args), 0); - std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return nullptr; } @@ -5392,11 +5540,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!LifetimeObject) continue; - int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + // First check that the Alloca is static, otherwise it won't have a + // valid frame index. + auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); + if (SI == FuncInfo.StaticAllocaMap.end()) + return nullptr; + + int FI = SI->second; SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -5406,7 +5560,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return nullptr; case Intrinsic::invariant_end: // Discard region information. @@ -5424,7 +5578,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::clear_cache: - return TLI->getClearCacheBuiltinName(); + return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return nullptr; @@ -5434,41 +5588,85 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { - visitPatchpoint(I); + visitPatchpoint(&I); return nullptr; } + case Intrinsic::experimental_gc_statepoint: { + visitStatepoint(I); + return nullptr; } -} - -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, - bool isTailCall, - MachineBasicBlock *LandingPad) { - const TargetLowering *TLI = TM.getTargetLowering(); - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = nullptr; - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Args.reserve(CS.arg_size()); + case Intrinsic::experimental_gc_result_int: + case Intrinsic::experimental_gc_result_float: + case Intrinsic::experimental_gc_result_ptr: { + visitGCResult(I); + return nullptr; + } + case Intrinsic::experimental_gc_relocate: { + visitGCRelocate(I); + return nullptr; + } + case Intrinsic::instrprof_increment: + llvm_unreachable("instrprof failed to lower an increment"); - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - const Value *V = *i; + case Intrinsic::frameallocate: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Do the allocation and map it as a normal value. + // FIXME: Maybe we should add this to the alloca map so that we don't have + // to register allocate it? + uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); + int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); + MVT PtrVT = TLI.getPointerTy(0); + SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); + setValue(&I, FIVal); + + // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is + // the same on all targets. + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(Alloc); - // Skip empty types - if (V->getType()->isEmptyTy()) - continue; + return nullptr; + } - SDValue ArgNode = getValue(V); - Entry.Node = ArgNode; Entry.Ty = V->getType(); + case Intrinsic::framerecover: { + // i8* @llvm.framerecover(i8* %fn, i8* %fp) + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = TLI.getPointerTy(0); + + // Get the symbol that defines the frame offset. + Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + + // Create a TargetExternalSymbol for the label to avoid any target lowering + // that would make this PC relative. + StringRef Name = FrameAllocSym->getName(); + assert(Name.size() == strlen(Name.data()) && "not null terminated"); + SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); + SDValue OffsetVal = + DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); + + // Add the offset to the FP. + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + setValue(&I, Add); - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); - Args.push_back(Entry); + return nullptr; } + } +} + +std::pair<SDValue, SDValue> +SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + MachineBasicBlock *LandingPad) { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = nullptr; if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be @@ -5490,24 +5688,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // this call might not return. (void)getRoot(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); - } - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) - isTailCall = false; + CLI.setChain(getRoot()); + } - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); - std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); - assert((isTailCall || Result.second.getNode()) && + assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -5530,6 +5721,50 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Inform MachineModuleInfo of range. MMI.addInvoke(LandingPad, BeginLabel, EndLabel); } + + return Result; +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + isTailCall = false; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); + + if (Result.first.getNode()) + setValue(CS.getInstruction(), Result.first); } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -5595,7 +5830,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { - EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else @@ -5620,7 +5855,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { - EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); setValue(&I, DAG.getConstant(0, CallVT)); return true; } @@ -5677,15 +5912,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { unsigned DstAS = LHS->getType()->getPointerAddressSpace(); unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) + // TODO: Check alignment of src and dest ptrs. + if (!TLI.isTypeLegal(LoadVT) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -5863,6 +6099,26 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return true; } +/// visitBinaryFloatCall - If a call instruction is a binary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a binary floating-point call. + if (I.getNumArgOperands() != 2 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + I.getType() != I.getArgOperand(1)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa<InlineAsm>(I.getCalledValue())) { @@ -5919,6 +6175,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FABS)) return; break; + case LibFunc::fmin: + case LibFunc::fminf: + case LibFunc::fminl: + if (visitBinaryFloatCall(I, ISD::FMINNUM)) + return; + break; + case LibFunc::fmax: + case LibFunc::fmaxf: + case LibFunc::fmaxl: + if (visitBinaryFloatCall(I, ISD::FMAXNUM)) + return; + break; case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: @@ -6025,7 +6293,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, - TM.getTargetLowering()->getPointerTy()); + DAG.getTargetLoweringInfo().getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -6220,9 +6488,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(CS); + TargetConstraints = TLI.ParseConstraints(CS); bool hasMemory = false; @@ -6247,10 +6515,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI->getSimpleValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -6271,8 +6539,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). - getSimpleVT(); + OpVT = + OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -6283,7 +6551,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI->getConstraintType(OpInfo.Codes[j]); + CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -6315,10 +6583,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -6332,7 +6600,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -6360,16 +6628,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), - TLI->getPointerTy()); + TLI.getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), @@ -6387,7 +6655,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6398,7 +6666,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6406,7 +6674,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI->getPointerTy())); + TLI.getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6429,7 +6697,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue()); + TLI.ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6447,7 +6715,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI->getPointerTy())); + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6469,7 +6737,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6549,7 +6817,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); @@ -6576,7 +6844,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6588,7 +6856,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); @@ -6602,20 +6870,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI->getPointerTy() && + assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6678,7 +6946,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI->getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6743,9 +7011,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &DL = *TLI->getDataLayout(); - SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = *TLI.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); @@ -6777,18 +7045,19 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. std::pair<SDValue, SDValue> -SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, +SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool useVoidTy) { + bool UseVoidTy, + MachineBasicBlock *LandingPad, + bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - ImmutableCallSite CS(&CI); for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; ArgI != ArgE; ++ArgI) { - const Value *V = CI.getOperand(ArgI); + const Value *V = CS->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); @@ -6799,14 +7068,13 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(!CI.use_empty()); + .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - const TargetLowering *TLI = TM.getTargetLowering(); - return TLI->LowerCallTo(CLI); + return lowerInvokable(CLI, LandingPad); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6826,11 +7094,11 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. -static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, +static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); @@ -6881,7 +7149,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -6908,7 +7176,8 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. -void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { +void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, + MachineBasicBlock *LandingPad) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, @@ -6916,32 +7185,29 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // [Args...], // [live variables...]) - CallingConv::ID CC = CI.getCallingConv(); - bool isAnyRegCC = CC == CallingConv::AnyReg; - bool hasDef = !CI.getType()->isVoidTy(); - SDValue Callee = getValue(CI.getOperand(2)); // <target> + CallingConv::ID CC = CS.getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !CS->getType()->isVoidTy(); + SDValue Callee = getValue(CS->getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && + assert(CS.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. - unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); - - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + LandingPad, true); - SDNode *CallEnd = Chain.getNode(); - if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + SDNode *CallEnd = Result.second.getNode(); + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); /// Get a call instruction from the call sequence chain. @@ -6949,16 +7215,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "Expected a callseq node."); SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + bool HasGlue = Call->getGluedNode(); // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; // Add the <id> and <numBytes> constants. - SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); - SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); @@ -6971,8 +7237,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Adjust <numArgs> to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] - unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); - NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); + NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); // Add the calling convention @@ -6980,20 +7246,20 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. - if (isAnyRegCC) + if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + Ops.push_back(getValue(CS.getArgument(i))); // Push the arguments from the call instruction up to the register mask. - SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. - if (hasGlue) + if (HasGlue) Ops.push_back(*(Call->op_end()-2)); else Ops.push_back(*(Call->op_end()-1)); @@ -7003,15 +7269,15 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { Ops.push_back(*(Call->op_begin())); // Push the glue flag (last operand). - if (hasGlue) + if (HasGlue) Ops.push_back(*(Call->op_end()-1)); SDVTList NodeTys; - if (isAnyRegCC && hasDef) { + if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 3> ValueVTs; - ComputeValueVTs(TLI, CI.getType(), ValueVTs); + ComputeValueVTs(TLI, CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -7026,18 +7292,18 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { getCurSDLoc(), NodeTys, Ops); // Update the NodeMap. - if (hasDef) { - if (isAnyRegCC) - setValue(&CI, SDValue(MN, 0)); + if (HasDef) { + if (IsAnyRegCC) + setValue(CS.getInstruction(), SDValue(MN, 0)); else - setValue(&CI, Result.first); + setValue(CS.getInstruction(), Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the // call sequence. Furthermore the location of the chain and glue can change // when the AnyReg calling convention is used and the intrinsic returns a // value. - if (isAnyRegCC && hasDef) { + if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); @@ -7186,8 +7452,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7233,10 +7502,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7349,10 +7614,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - const TargetLowering *TLI = TM.getTargetLowering(); - RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); + + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == + FuncInfo.PreferredExtendType.end()) + ? ISD::ANY_EXTEND + : FuncInfo.PreferredExtendType[V]; + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -7378,15 +7648,13 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); - const TargetLowering *TLI = getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*getTargetLowering(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -7451,8 +7719,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7465,11 +7736,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); - - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); @@ -7478,9 +7744,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Call the target to set up the argument values. SmallVector<SDValue, 8> InVals; - SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), - F.isVarArg(), Ins, - dl, DAG, InVals); + SDValue NewRoot = TLI->LowerFormalArguments( + DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && @@ -7517,8 +7782,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; - NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), - SRetReg, ArgValue); + NewRoot = + SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. @@ -7633,7 +7898,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // If this terminator has multiple identical successors (common for // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; + if (!SuccsHandled.insert(SuccMBB).second) + continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); @@ -7676,11 +7942,11 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector<EVT, 4> ValueVTs; - const TargetLowering *TLI = TM.getTargetLowering(); - ComputeValueVTs(*TLI, PN->getType(), ValueVTs); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; @@ -7697,6 +7963,7 @@ MachineBasicBlock * SelectionDAGBuilder::StackProtectorDescriptor:: AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, + bool IsLikely, MachineBasicBlock *SuccMBB) { // If SuccBB has not been created yet, create it. if (!SuccMBB) { @@ -7706,6 +7973,7 @@ AddSuccessorMBB(const BasicBlock *BB, MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. - ParentMBB->addSuccessor(SuccMBB); + ParentMBB->addSuccessor( + SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 84679f98d84e..eba98b8086b7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAGBUILDER_H -#define SELECTIONDAGBUILDER_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -21,6 +22,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" #include <vector> namespace llvm { @@ -114,6 +116,10 @@ public: /// get simple disambiguation between loads without worrying about alias /// analysis. SmallVector<SDValue, 8> PendingLoads; + + /// State used while lowering a statepoint sequence (gc_statepoint, + /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. + StatepointLoweringState StatepointLowering; private: /// PendingExports - CopyToReg nodes that copy values to virtual registers @@ -200,7 +206,7 @@ private: } }; - size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); + void Clusterify(CaseVector &Cases, const SwitchInst &SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -276,9 +282,9 @@ private: BitTestBlock(APInt F, APInt R, const Value* SV, unsigned Rg, MVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, - const BitTestInfo& C): + BitTestInfo C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(C) { } + Parent(P), Default(D), Cases(std::move(C)) { } APInt First; APInt Range; const Value *SValue; @@ -397,7 +403,8 @@ private: class StackProtectorDescriptor { public: StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), - FailureMBB(nullptr), Guard(nullptr) { } + FailureMBB(nullptr), Guard(nullptr), + GuardReg(0) { } ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are @@ -415,8 +422,8 @@ private: assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " "already initialized!"); ParentMBB = MBB; - SuccessMBB = AddSuccessorMBB(BB, MBB); - FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); if (!Guard) Guard = StackProtCheckCall.getArgOperand(0); } @@ -455,6 +462,9 @@ private: MachineBasicBlock *getFailureMBB() { return FailureMBB; } const Value *getGuard() { return Guard; } + unsigned getGuardReg() const { return GuardReg; } + void setGuardReg(unsigned R) { GuardReg = R; } + private: /// The basic block for which we are generating the stack protector. /// @@ -477,11 +487,15 @@ private: /// stack protector stack slot. const Value *Guard; + /// The virtual register holding the stack guard value. + unsigned GuardReg; + /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic - /// block will be created. + /// block will be created. Assign a large weight if IsLikely is true. MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, + bool IsLikely, MachineBasicBlock *SuccMBB = nullptr); }; @@ -604,6 +618,13 @@ public: N = NewN; } + void removeValue(const Value *V) { + // This is to support hack in lowerCallFromStatepoint + // Should be removed when hack is resolved + if (NodeMap.count(V)) + NodeMap.erase(V); + } + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(!N.getNode() && "Already set a value for this node!"); @@ -626,17 +647,24 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = nullptr); - std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, - unsigned ArgIdx, - unsigned NumArgs, - SDValue Callee, - bool useVoidTy = false); + std::pair<SDValue, SDValue> lowerCallOperands( + ImmutableCallSite CS, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool UseVoidTy = false, + MachineBasicBlock *LandingPad = nullptr, + bool IsPatchPoint = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: + std::pair<SDValue, SDValue> lowerInvokable( + TargetLowering::CallLoweringInfo &CLI, + MachineBasicBlock *LandingPad); + // Terminator instructions. void visitRet(const ReturnInst &I); void visitBr(const BranchInst &I); @@ -658,7 +686,6 @@ private: bool handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); bool handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, @@ -686,6 +713,8 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); + unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. @@ -743,6 +772,8 @@ private: void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); + void visitMaskedLoad(const CallInst &I); + void visitMaskedStore(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); @@ -755,6 +786,7 @@ private: bool visitStrLenCall(const CallInst &I); bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -767,7 +799,13 @@ private: void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); - void visitPatchpoint(const CallInst &I); + void visitPatchpoint(ImmutableCallSite CS, + MachineBasicBlock *LandingPad = nullptr); + + // These three are implemented in StatepointLowering.cpp + void visitStatepoint(const CallInst &I); + void visitGCRelocate(const CallInst &I); + void visitGCResult(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -784,7 +822,7 @@ private: /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. - bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, int64_t Offset, bool IsIndirect, const SDValue &N); }; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a71cc6859ea0..e8577d898c2d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; std::string SDNode::getOperationName(const SelectionDAG *G) const { @@ -36,7 +37,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "<<Unknown DAG Node>>"; if (isMachineOpcode()) { if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) return TII->getName(getMachineOpcode()); return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; @@ -140,6 +141,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Unary operators case ISD::FABS: return "fabs"; + case ISD::FMINNUM: return "fminnum"; + case ISD::FMAXNUM: return "fmaxnum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; @@ -266,6 +269,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; + case ISD::MLOAD: return "masked_load"; + case ISD::MSTORE: return "masked_store"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; @@ -433,7 +438,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr); + OS << ' ' << PrintReg(R->getReg(), + G ? G->getSubtarget().getRegisterInfo() : nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -565,7 +571,7 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. + if (!once.insert(N).second) // If we've been here before, return now. return; // Dump the current SDNode, but don't end the line yet. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 57e22e21c371..4f031d3ff7e7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -40,6 +41,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -181,6 +183,10 @@ UseMBPI("use-mbpi", cl::init(true), cl::Hidden); #ifndef NDEBUG +static cl::opt<std::string> +FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, + cl::desc("Only display the basic block whose name " + "matches this for all view-*-dags options")); static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the first " @@ -284,8 +290,8 @@ namespace llvm { /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetLowering *TLI = IS->getTargetLowering(); - const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); + const TargetLowering *TLI = IS->TLI; + const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || TLI->getSchedulingPreference() == Sched::Source) @@ -336,7 +342,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), - FuncInfo(new FunctionLoweringInfo(TM)), + FuncInfo(new FunctionLoweringInfo()), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), @@ -411,32 +417,32 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetLowering *TLI = TM.getTargetLowering(); - MF = &mf; - RegInfo = &MF->getRegInfo(); - AA = &getAnalysis<AliasAnalysis>(); - LibInfo = &getAnalysis<TargetLibraryInfo>(); - GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; - - TargetSubtargetInfo &ST = - const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); - ST.resetSubtargetFeatures(MF); - TM.resetTargetOptions(MF); + // Reset the target options before resetting the optimization + // level below. + // FIXME: This is a horrible hack and should be processed via + // codegen looking at the optimization level explicitly when + // it wants to look at it. + TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. CodeGenOpt::Level NewOptLevel = OptLevel; if (Fn.hasFnAttribute(Attribute::OptimizeNone)) NewOptLevel = CodeGenOpt::None; OptLevelChanger OLC(*this, NewOptLevel); + TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + LibInfo = &getAnalysis<TargetLibraryInfo>(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TLI); + CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -454,7 +460,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. MachineBasicBlock *EntryMBB = MF->begin(); - RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) @@ -489,15 +496,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; - const MDNode *Variable = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); + const MDNode *Variable = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, - LDI->second, Offset, Variable); + TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, + Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -516,11 +522,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } if (CopyUseMI) { MachineInstr *NewMI = - BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, - CopyUseMI->getOperand(0).getReg(), - Offset, Variable); + BuildMI(*MF, CopyUseMI->getDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -534,7 +538,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { break; for (const auto &MI : MBB) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode()); + const MCInstrDesc &MCID = TII->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || MI.isStackAligningInlineAsm()) { MFI->setHasCalls(true); @@ -617,7 +621,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { SDNode *N = Worklist.pop_back_val(); // If we've already seen this node, ignore it. - if (!VisitedNodes.insert(N)) + if (!VisitedNodes.insert(N).second) continue; // Otherwise, add all chain operands to the worklist. @@ -652,6 +656,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { std::string BlockName; int BlockNumber = -1; (void)BlockNumber; + bool MatchFilterBB = false; (void)MatchFilterBB; +#ifndef NDEBUG + MatchFilterBB = (!FilterDAGBasicBlockName.empty() && + FilterDAGBasicBlockName == + FuncInfo->MBB->getBasicBlock()->getName().str()); +#endif #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || @@ -665,7 +675,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + if (ViewDAGCombine1 && MatchFilterBB) + CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. { @@ -678,8 +689,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Second step, hack on the DAG until it only uses operations and types that // the target supports. - if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + - BlockName); + if (ViewLegalizeTypesDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize-types input for " + BlockName); bool Changed; { @@ -693,7 +704,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->NewNodesMustHaveLegalTypes = true; if (Changed) { - if (ViewDAGCombineLT) + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. @@ -719,7 +730,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - if (ViewDAGCombineLT) + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. @@ -733,7 +744,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } - if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + if (ViewLegalizeDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize input for " + BlockName); { NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); @@ -743,7 +755,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + if (ViewDAGCombine2 && MatchFilterBB) + CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. { @@ -757,7 +770,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + if (ViewISelDAGs && MatchFilterBB) + CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. @@ -769,7 +783,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + if (ViewSchedDAGs && MatchFilterBB) + CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); @@ -779,7 +794,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Scheduler->Run(CurDAG, FuncInfo->MBB); } - if (ViewSUnitDAGs) Scheduler->viewGraph(); + if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. @@ -894,6 +909,8 @@ void SelectionDAGISel::DoInstructionSelection() { void SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -901,13 +918,70 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + if (TM.getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::MSVC) { + // Make virtual registers and a series of labels that fill in values for the + // clauses. + auto &RI = MF->getRegInfo(); + FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); + + // Get all invoke BBs that will unwind into the clause BBs. + SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), + MBB->pred_end()); + + // Emit separate machine basic blocks with separate labels for each clause + // before the main landing pad block. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); + MachineInstrBuilder SelectorPHI = BuildMI( + *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI), + FuncInfo->ExceptionSelectorVirtReg); + for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { + MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); + MF->insert(MBB, ClauseBB); + + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); + + // Mark the clause as a landing pad or MI passes will delete it. + ClauseBB->setIsLandingPad(); + + GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); + + // Start the BB with a label. + MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); + BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) + .addSym(ClauseLabel); + + // Construct a simple BB that defines a register with the typeid constant. + FuncInfo->MBB = ClauseBB; + FuncInfo->InsertPt = ClauseBB->end(); + unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Add the typeid virtual register to the phi in the main landing pad. + SelectorPHI.addReg(VReg).addMBB(ClauseBB); + } + + // Remove the edge from the invoke to the lpad. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->removeSuccessor(MBB); + + // Restore FuncInfo back to its previous state and select the main landing + // pad block. + FuncInfo->MBB = MBB; + FuncInfo->InsertPt = MBB->end(); + return; + } + // Mark exception register as live in. - const TargetLowering *TLI = getTargetLowering(); - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); @@ -1042,7 +1116,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) - FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); + FastIS = TLI->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1096,7 +1170,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { ++NumEntryBlocks; // Lower any arguments needed in this block if this is the entry block. - if (!FastIS->LowerArguments()) { + if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; if (EnableFastISelAbortArgs) @@ -1134,7 +1208,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(Inst)) { + if (FastIS->selectInstruction(Inst)) { --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1729,7 +1803,7 @@ static SDNode *findGlueUse(SDNode *N) { /// This function recursively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, - SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited, + SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, bool IgnoreChains) { // The NodeID's are given uniques ID's where a node ID is guaranteed to be // greater than all of its (recursive) operands. If we scan to a point where @@ -1744,7 +1818,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // Don't revisit nodes if we already scanned it and didn't fail, we know we // won't fail if we scan it again. - if (!Visited.insert(Use)) + if (!Visited.insert(Use).second) return false; for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { @@ -1861,8 +1935,8 @@ SDNode SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = getTargetLowering()->getRegisterByName( - RegStr->getString().data(), Op->getValueType(0)); + unsigned Reg = + TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0)); SDValue New = CurDAG->getCopyFromReg( CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -1874,8 +1948,8 @@ SDNode SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = getTargetLowering()->getRegisterByName( - RegStr->getString().data(), Op->getOperand(2).getValueType()); + unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Op->getOperand(2).getValueType()); SDValue New = CurDAG->getCopyToReg( CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -2375,7 +2449,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); + Result = !::CheckType(Table, Index, N, SDISel.TLI); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2385,14 +2459,15 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), - Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); + Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Table[Index - 1] - + SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); + Result = !::CheckValueType(Table, Index, N, SDISel.TLI); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2436,6 +2511,42 @@ struct MatchScope { bool HasChainNodesMatched, HasGlueResultNodesMatched; }; +/// \\brief A DAG update listener to keep the matching state +/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to +/// change the DAG while matching. X86 addressing mode matcher is an example +/// for this. +class MatchStateUpdater : public SelectionDAG::DAGUpdateListener +{ + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; +public: + MatchStateUpdater(SelectionDAG &DAG, + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN, + SmallVectorImpl<MatchScope> &MS) : + SelectionDAG::DAGUpdateListener(DAG), + RecordedNodes(RN), MatchScopes(MS) { } + + void NodeDeleted(SDNode *N, SDNode *E) { + // Some early-returns here to avoid the search if we deleted the node or + // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we + // do, so it's unnecessary to update matching state at that point). + // Neither of these can occur currently because we only install this + // update listener during matching a complex patterns. + if (!E || E->isMachineOpcode()) + return; + // Performing linear search here does not matter because we almost never + // run this code. You'd have to have a CSE during complex pattern + // matching. + for (auto &I : RecordedNodes) + if (I.first.getNode() == N) + I.first.setNode(E); + + for (auto &I : MatchScopes) + for (auto &J : I.NodeStack) + if (J.getNode() == N) + J.setNode(E); + } +}; } SDNode *SelectionDAGISel:: @@ -2449,8 +2560,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::BasicBlock: case ISD::Register: case ISD::RegisterMask: - //case ISD::VALUETYPE: - //case ISD::CONDCODE: case ISD::HANDLENODE: case ISD::MDNODE_SDNODE: case ISD::TargetConstant: @@ -2692,6 +2801,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + + // If target can modify DAG during matching, keep the matching state + // consistent. + std::unique_ptr<MatchStateUpdater> MSU; + if (ComplexPatternFuncMutatesDAG()) + MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MatchScopes)); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, RecordedNodes[RecNo].first, CPNum, RecordedNodes)) @@ -2703,7 +2820,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; continue; @@ -2751,7 +2868,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = getTargetLowering()->getPointerTy(); + CaseVT = TLI->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2773,7 +2890,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, Opcode-OPC_CheckChild0Type)) break; continue; @@ -2781,7 +2898,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; continue; case OPC_CheckInteger: @@ -2980,7 +3097,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; + if (VT == MVT::iPTR) + VT = TLI->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -3076,7 +3194,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + const MCInstrDesc &MCID = TII->get(TargetOpc); bool mayLoad = MCID.mayLoad(); bool mayStore = MCID.mayStore(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp new file mode 100644 index 000000000000..33c20d3f2195 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -0,0 +1,684 @@ +//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "StatepointLowering.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "statepoint-lowering" + +STATISTIC(NumSlotsAllocatedForStatepoints, + "Number of stack slots allocated for statepoints"); +STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); +STATISTIC(StatepointMaxSlotsRequired, + "Maximum number of stack slots required for a singe statepoint"); + +void +StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { + // Consistency check + assert(PendingGCRelocateCalls.empty() && + "Trying to visit statepoint before finished processing previous one"); + Locations.clear(); + RelocLocations.clear(); + NextSlotToAllocate = 0; + // Need to resize this on each safepoint - we need the two to stay in + // sync and the clear patterns of a SelectionDAGBuilder have no relation + // to FunctionLoweringInfo. + AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); + for (size_t i = 0; i < AllocatedStackSlots.size(); i++) { + AllocatedStackSlots[i] = false; + } +} +void StatepointLoweringState::clear() { + Locations.clear(); + RelocLocations.clear(); + AllocatedStackSlots.clear(); + assert(PendingGCRelocateCalls.empty() && + "cleared before statepoint sequence completed"); +} + +SDValue +StatepointLoweringState::allocateStackSlot(EVT ValueType, + SelectionDAGBuilder &Builder) { + + NumSlotsAllocatedForStatepoints++; + + // The basic scheme here is to first look for a previously created stack slot + // which is not in use (accounting for the fact arbitrary slots may already + // be reserved), or to create a new stack slot and use it. + + // If this doesn't succeed in 40000 iterations, something is seriously wrong + for (int i = 0; i < 40000; i++) { + assert(Builder.FuncInfo.StatepointStackSlots.size() == + AllocatedStackSlots.size() && + "broken invariant"); + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "broken invariant"); + + if (NextSlotToAllocate >= NumSlots) { + assert(NextSlotToAllocate == NumSlots); + // record stats + if (NumSlots + 1 > StatepointMaxSlotsRequired) { + StatepointMaxSlotsRequired = NumSlots + 1; + } + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.push_back(true); + return SpillSlot; + } + if (!AllocatedStackSlots[NextSlotToAllocate]) { + const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; + AllocatedStackSlots[NextSlotToAllocate] = true; + return Builder.DAG.getFrameIndex(FI, ValueType); + } + // Note: We deliberately choose to advance this only on the failing path. + // Doing so on the suceeding path involes a bit of complexity that caused a + // minor bug previously. Unless performance shows this matters, please + // keep this code as simple as possible. + NextSlotToAllocate++; + } + llvm_unreachable("infinite loop?"); +} + +/// Try to find existing copies of the incoming values in stack slots used for +/// statepoint spilling. If we can find a spill slot for the incoming value, +/// mark that slot as allocated, and reuse the same slot for this safepoint. +/// This helps to avoid series of loads and stores that only serve to resuffle +/// values on the stack between calls. +static void reservePreviousStackSlotForValue(SDValue Incoming, + SelectionDAGBuilder &Builder) { + + if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { + // We won't need to spill this, so no need to check for previously + // allocated stack slots + return; + } + + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + if (Loc.getNode()) { + // duplicates in input + return; + } + + // Search back for the load from a stack slot pattern to find the original + // slot we allocated for this value. We could extend this to deal with + // simple modification patterns, but simple dealing with trivial load/store + // sequences helps a lot already. + if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) { + if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) { + const int Index = FI->getIndex(); + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), Index); + if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) { + // not one of the lowering stack slots, can't reuse! + // TODO: Actually, we probably could reuse the stack slot if the value + // hasn't changed at all, but we'd need to look for intervening writes + return; + } else { + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; + } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); + } + + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = + Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + } + + // TODO: handle case where a reloaded value flows through a phi to + // another safepoint. e.g. + // bb1: + // a' = relocated... + // bb2: % pred: bb1, bb3, bb4, etc. + // a_phi = phi(a', ...) + // statepoint ... a_phi + // NOTE: This will require reasoning about cross basic block values. This is + // decidedly non trivial and this might not be the right place to do it. We + // don't really have the information we need here... + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) +} + +/// Remove any duplicate (as SDValues) from the derived pointer pairs. This +/// is not required for correctness. It's purpose is to reduce the size of +/// StackMap section. It has no effect on the number of spill slots required +/// or the actual lowering. +static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + SelectionDAGBuilder &Builder) { + + // This is horribly ineffecient, but I don't care right now + SmallSet<SDValue, 64> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; + for (size_t i = 0; i < Ptrs.size(); i++) { + SDValue SD = Builder.getValue(Ptrs[i]); + // Only add non-duplicates + if (Seen.count(SD) == 0) { + NewBases.push_back(Bases[i]); + NewPtrs.push_back(Ptrs[i]); + NewRelocs.push_back(Relocs[i]); + } + Seen.insert(SD); + } + assert(Bases.size() >= NewBases.size()); + assert(Ptrs.size() >= NewPtrs.size()); + assert(Relocs.size() >= NewRelocs.size()); + Bases = NewBases; + Ptrs = NewPtrs; + Relocs = NewRelocs; + assert(Ptrs.size() == Bases.size()); + assert(Ptrs.size() == Relocs.size()); +} + +/// Extract call from statepoint, lower it and return pointer to the +/// call node. Also update NodeMap so that getValue(statepoint) will +/// reference lowered call result +static SDNode *lowerCallFromStatepoint(const CallInst &CI, + SelectionDAGBuilder &Builder) { + + assert(Intrinsic::experimental_gc_statepoint == + dyn_cast<IntrinsicInst>(&CI)->getIntrinsicID() && + "function called must be the statepoint function"); + + ImmutableStatepoint StatepointOperands(&CI); + + // Lower the actual call itself - This is a bit of a hack, but we want to + // avoid modifying the actual lowering code. This is similiar in intent to + // the LowerCallOperands mechanism used by PATCHPOINT, but is structured + // differently. Hopefully, this is slightly more robust w.r.t. calling + // convention, return values, and other function attributes. + Value *ActualCallee = const_cast<Value *>(StatepointOperands.actualCallee()); + + std::vector<Value *> Args; + CallInst::const_op_iterator arg_begin = StatepointOperands.call_args_begin(); + CallInst::const_op_iterator arg_end = StatepointOperands.call_args_end(); + Args.insert(Args.end(), arg_begin, arg_end); + // TODO: remove the creation of a new instruction! We should not be + // modifying the IR (even temporarily) at this point. + CallInst *Tmp = CallInst::Create(ActualCallee, Args); + Tmp->setTailCall(CI.isTailCall()); + Tmp->setCallingConv(CI.getCallingConv()); + Tmp->setAttributes(CI.getAttributes()); + Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); + + // Handle the return value of the call iff any. + const bool HasDef = !Tmp->getType()->isVoidTy(); + if (HasDef) { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(&CI, Builder.getValue(Tmp)); + } else { + // The token value is never used from here on, just generate a poison value + Builder.setValue(&CI, Builder.DAG.getIntPtrConstant(-1)); + } + // Remove the fake entry we created so we don't have a hanging reference + // after we delete this node. + Builder.removeValue(Tmp); + delete Tmp; + Tmp = nullptr; + + // Search for the call node + // The following code is essentially reverse engineering X86's + // LowerCallTo. + SDNode *CallNode = nullptr; + + // We just emitted a call, so it should be last thing generated + SDValue Chain = Builder.DAG.getRoot(); + + // Find closest CALLSEQ_END walking back through lowered nodes if needed + SDNode *CallEnd = Chain.getNode(); + int Sanity = 0; + while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { + CallEnd = CallEnd->getGluedNode(); + assert(CallEnd && "Can not find call node"); + assert(Sanity < 20 && "should have found call end already"); + Sanity++; + } + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + assert(CallEnd->getGluedNode()); + + // Step back inside the CALLSEQ + CallNode = CallEnd->getGluedNode(); + return CallNode; +} + +/// Callect all gc pointers coming into statepoint intrinsic, clean them up, +/// and return two arrays: +/// Bases - base pointers incoming to this statepoint +/// Ptrs - derived pointers incoming to this statepoint +/// Relocs - the gc_relocate corresponding to each base/ptr pair +/// Elements of this arrays should be in one-to-one correspondence with each +/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call +static void +getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + ImmutableCallSite Statepoint, + SelectionDAGBuilder &Builder) { + // Search for relocated pointers. Note that working backwards from the + // gc_relocates ensures that we only get pairs which are actually relocated + // and used after the statepoint. + // TODO: This logic should probably become a utility function in Statepoint.h + for (const User *U : cast<CallInst>(Statepoint.getInstruction())->users()) { + if (!isGCRelocate(U)) { + continue; + } + GCRelocateOperands relocateOpers(U); + Relocs.push_back(cast<Value>(U)); + Bases.push_back(relocateOpers.basePtr()); + Ptrs.push_back(relocateOpers.derivedPtr()); + } + + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder); + + assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size()); +} + +/// Spill a value incoming to the statepoint. It might be either part of +/// vmstate +/// or gcstate. In both cases unconditionally spill it on the stack unless it +/// is a null constant. Return pair with first element being frame index +/// containing saved value and second element with outgoing chain from the +/// emitted store +static std::pair<SDValue, SDValue> +spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, + SelectionDAGBuilder &Builder) { + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + + // Emit new store if we didn't do it for this ptr before + if (!Loc.getNode()) { + Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), + Builder); + assert(isa<FrameIndexSDNode>(Loc)); + int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); + // We use TargetFrameIndex so that isel will not select it into LEA + Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + // TODO: We can create TokenFactor node instead of + // chaining stores one after another, this may allow + // a bit more optimal scheduling for them + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, + MachinePointerInfo::getFixedStack(Index), + false, false, 0); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + + assert(Loc.getNode()); + return std::make_pair(Loc, Chain); +} + +/// Lower a single value incoming to a statepoint node. This value can be +/// either a deopt value or a gc value, the handling is the same. We special +/// case constants and allocas, then fall back to spilling if required. +static void lowerIncomingStatepointValue(SDValue Incoming, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + SDValue Chain = Builder.getRoot(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + // If the original value was a constant, make sure it gets recorded as + // such in the stackmap. This is required so that the consumer can + // parse any internal format to the deopt state. It also handles null + // pointers and other constant pointers in GC states + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else { + // Otherwise, locate a spill slot and explicitly spill it so it + // can be found by the runtime later. We currently do not support + // tracking values through callee saved registers to their eventual + // spill location. This would be a useful optimization, but would + // need to be optional since it requires a lot of complexity on the + // runtime side which not all would support. + std::pair<SDValue, SDValue> Res = + spillIncomingStatepointValue(Incoming, Chain, Builder); + Ops.push_back(Res.first); + Chain = Res.second; + } + + Builder.DAG.setRoot(Chain); +} + +/// Lower deopt state and gc pointer arguments of the statepoint. The actual +/// lowering is described in lowerIncomingStatepointValue. This function is +/// responsible for lowering everything in the right position and playing some +/// tricks to avoid redundant stack manipulation where possible. On +/// completion, 'Ops' will contain ready to use operands for machine code +/// statepoint. The chain nodes will have already been created and the DAG root +/// will be set to the last value spilled (if any were). +static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + ImmutableStatepoint Statepoint, + SelectionDAGBuilder &Builder) { + + // Lower the deopt and gc arguments for this statepoint. Layout will + // be: deopt argument length, deopt arguments.., gc arguments... + + SmallVector<const Value *, 64> Bases, Ptrs, Relocations; + getIncomingStatepointGCValues(Bases, Ptrs, Relocations, + Statepoint.getCallSite(), Builder); + +#ifndef NDEBUG + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer into the GC + // heap. This is basically just here to help catch errors during statepoint + // insertion. TODO: This should actually be in the Verifier, but we can't get + // to the GCStrategy from there (yet). + if (Builder.GFI) { + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); + } + } + } +#endif + + + + // Before we actually start lowering (and allocating spill slots for values), + // reserve any stack slots which we judge to be profitable to reuse for a + // particular value. This is purely an optimization over the code below and + // doesn't change semantics at all. It is important for performance that we + // reserve slots for both deopt and gc values before lowering either. + for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); + I != E; ++I) { + Value *V = *I; + SDValue Incoming = Builder.getValue(V); + reservePreviousStackSlotForValue(Incoming, Builder); + } + for (unsigned i = 0; i < Bases.size() * 2; ++i) { + // Even elements will contain base, odd elements - derived ptr + const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + SDValue Incoming = Builder.getValue(V); + reservePreviousStackSlotForValue(Incoming, Builder); + } + + // First, prefix the list with the number of unique values to be + // lowered. Note that this is the number of *Values* not the + // number of SDValues required to lower them. + const int NumVMSArgs = Statepoint.numTotalVMSArgs(); + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64)); + + assert(NumVMSArgs + 1 == std::distance(Statepoint.vm_state_begin(), + Statepoint.vm_state_end())); + + // The vm state arguments are lowered in an opaque manner. We do + // not know what type of values are contained within. We skip the + // first one since that happens to be the total number we lowered + // explicitly just above. We could have left it in the loop and + // not done it explicitly, but it's far easier to understand this + // way. + for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); + I != E; ++I) { + const Value *V = *I; + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } + + // Finally, go ahead and lower all the gc arguments. There's no prefixed + // length for this one. After lowering, we'll have the base and pointer + // arrays interwoven with each (lowered) base pointer immediately followed by + // it's (lowered) derived pointer. i.e + // (base[0], ptr[0], base[1], ptr[1], ...) + for (unsigned i = 0; i < Bases.size() * 2; ++i) { + // Even elements will contain base, odd elements - derived ptr + const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } +} +void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { + // The basic scheme here is that information about both the original call and + // the safepoint is encoded in the CallInst. We create a temporary call and + // lower it, then reverse engineer the calling sequence. + + // Check some preconditions for sanity + assert(isStatepoint(&CI) && + "function called must be the statepoint function"); + NumOfStatepoints++; + // Clear state + StatepointLowering.startNewStatepoint(*this); + +#ifndef NDEBUG + // Consistency check + for (const User *U : CI.users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } +#endif + + ImmutableStatepoint ISP(&CI); +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + // TODO: This if should become an assert. For now, we allow the GCStrategy + // to be optional for backwards compatibility. This will only last a short + // period (i.e. a couple of weeks). + if (GFI) { + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); + } +#endif + + + // Lower statepoint vmstate and gcstate arguments + SmallVector<SDValue, 10> LoweredArgs; + lowerStatepointMetaArgs(LoweredArgs, ISP, *this); + + // Get call node, we will replace it later with statepoint + SDNode *CallNode = lowerCallFromStatepoint(CI, *this); + + // Construct the actual STATEPOINT node with all the appropriate arguments + // and return values. + + // TODO: Currently, all of these operands are being marked as read/write in + // PrologEpilougeInserter.cpp, we should special case the VMState arguments + // and flags to be read-only. + SmallVector<SDValue, 40> Ops; + + // Calculate and push starting position of vmstate arguments + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Glue; + if (CallNode->getGluedNode()) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + // Get number of arguments incoming directly into call node + unsigned NumCallRegArgs = + CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add call target + SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); + Ops.push_back(CallTarget); + + // Add call arguments + // Get position of register mask in the call + SDNode::op_iterator RegMaskIt; + if (Glue.getNode()) + RegMaskIt = CallNode->op_end() - 2; + else + RegMaskIt = CallNode->op_end() - 1; + Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); + + // Add a leading constant argument with the Flags and the calling convention + // masked together + CallingConv::ID CallConv = CI.getCallingConv(); + int Flags = dyn_cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue(); + assert(Flags == 0 && "not expected to be used"); + Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64)); + + // Insert all vmstate and gcstate arguments + Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end()); + + // Add register mask from call node + Ops.push_back(*RegMaskIt); + + // Add chain + Ops.push_back(CallNode->getOperand(0)); + + // Same for the glue, but we add it only if original call had it + if (Glue.getNode()) + Ops.push_back(Glue); + + // Compute return values + SmallVector<EVT, 21> ValueVTs; + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); // provide a glue output since we consume one + // as input. This allows someone else to chain + // off us as needed. + SDVTList NodeTys = DAG.getVTList(ValueVTs); + + SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Replace original call + DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root + // Remove originall call node + DAG.DeleteNode(CallNode); + + // DON'T set the root - under the assumption that it's already set past the + // inserted node we created. + + // TODO: A better future implementation would be to emit a single variable + // argument, variable return value STATEPOINT node here and then hookup the + // return value of each gc.relocate to the respective output of the + // previously emitted STATEPOINT value. Unfortunately, this doesn't appear + // to actually be possible today. +} + +void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { + // The result value of the gc_result is simply the result of the actual + // call. We've already emitted this, so just grab the value. + Instruction *I = cast<Instruction>(CI.getArgOperand(0)); + assert(isStatepoint(I) && + "first argument must be a statepoint token"); + + setValue(&CI, getValue(I)); +} + +void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { +#ifndef NDEBUG + // Consistency check + StatepointLowering.relocCallVisited(CI); +#endif + + GCRelocateOperands relocateOpers(&CI); + SDValue SD = getValue(relocateOpers.derivedPtr()); + + if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) { + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + setValue(&CI, SD); + return; + } + + SDValue Loc = StatepointLowering.getRelocLocation(SD); + // Emit new load if we did not emit it before + if (!Loc.getNode()) { + SDValue SpillSlot = StatepointLowering.getLocation(SD); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); + + Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, + SpillSlot, MachinePointerInfo::getFixedStack(FI), false, + false, false, 0); + + StatepointLowering.setRelocLocation(SD, Loc); + + // Again, be conservative, don't emit pending loads + DAG.setRoot(Loc.getValue(1)); + } + + assert(Loc.getNode()); + setValue(&CI, Loc); +} diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h new file mode 100644 index 000000000000..673112cf8bb5 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -0,0 +1,138 @@ +//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include <vector> + +namespace llvm { +class SelectionDAGBuilder; + +/// This class tracks both per-statepoint and per-selectiondag information. +/// For each statepoint it tracks locations of it's gc valuess (incoming and +/// relocated) and list of gcreloc calls scheduled for visiting (this is +/// used for a debug mode consistency check only). The spill slot tracking +/// works in concert with information in FunctionLoweringInfo. +class StatepointLoweringState { +public: + StatepointLoweringState() : NextSlotToAllocate(0) { + } + + /// Reset all state tracking for a newly encountered safepoint. Also + /// performs some consistency checking. + void startNewStatepoint(SelectionDAGBuilder &Builder); + + /// Clear the memory usage of this object. This is called from + /// SelectionDAGBuilder::clear. We require this is never called in the + /// midst of processing a statepoint sequence. + void clear(); + + /// Returns the spill location of a value incoming to the current + /// statepoint. Will return SDValue() if this value hasn't been + /// spilled. Otherwise, the value has already been spilled and no + /// further action is required by the caller. + SDValue getLocation(SDValue val) { + if (!Locations.count(val)) + return SDValue(); + return Locations[val]; + } + void setLocation(SDValue val, SDValue Location) { + assert(!Locations.count(val) && + "Trying to allocate already allocated location"); + Locations[val] = Location; + } + + /// Returns the relocated value for a given input pointer. Will + /// return SDValue() if this value hasn't yet been reloaded from + /// it's stack slot after the statepoint. Otherwise, the value + /// has already been reloaded and the SDValue of that reload will + /// be returned. Note that VMState values are spilled but not + /// reloaded (since they don't change at the safepoint unless + /// also listed in the GC pointer section) and will thus never + /// be in this map + SDValue getRelocLocation(SDValue val) { + if (!RelocLocations.count(val)) + return SDValue(); + return RelocLocations[val]; + } + void setRelocLocation(SDValue val, SDValue Location) { + assert(!RelocLocations.count(val) && + "Trying to allocate already allocated location"); + RelocLocations[val] = Location; + } + + /// Record the fact that we expect to encounter a given gc_relocate + /// before the next statepoint. If we don't see it, we'll report + /// an assertion. + void scheduleRelocCall(const CallInst &RelocCall) { + PendingGCRelocateCalls.push_back(&RelocCall); + } + /// Remove this gc_relocate from the list we're expecting to see + /// before the next statepoint. If we weren't expecting to see + /// it, we'll report an assertion. + void relocCallVisited(const CallInst &RelocCall) { + SmallVectorImpl<const CallInst *>::iterator itr = + std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(), + &RelocCall); + assert(itr != PendingGCRelocateCalls.end() && + "Visited unexpected gcrelocate call"); + PendingGCRelocateCalls.erase(itr); + } + + // TODO: Should add consistency tracking to ensure we encounter + // expected gc_result calls too. + + /// Get a stack slot we can use to store an value of type ValueType. This + /// will hopefully be a recylced slot from another statepoint. + SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); + + void reserveStackSlot(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + assert(!AllocatedStackSlots[Offset] && "already reserved!"); + assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); + AllocatedStackSlots[Offset] = true; + } + bool isStackSlotAllocated(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + return AllocatedStackSlots[Offset]; + } + +private: + /// Maps pre-relocation value (gc pointer directly incoming into statepoint) + /// into it's location (currently only stack slots) + DenseMap<SDValue, SDValue> Locations; + /// Map pre-relocated value into it's new relocated location + DenseMap<SDValue, SDValue> RelocLocations; + + /// A boolean indicator for each slot listed in the FunctionInfo as to + /// whether it has been used in the current statepoint. Since we try to + /// preserve stack slots across safepoints, there can be gaps in which + /// slots have been allocated. + SmallVector<bool, 50> AllocatedStackSlots; + + /// Points just beyond the last slot known to have been allocated + unsigned NextSlotToAllocate; + + /// Keep track of pending gcrelocate calls for consistency check + SmallVector<const CallInst *, 10> PendingGCRelocateCalls; +}; +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 05ace413bfdf..72e0aca84080 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -31,13 +31,13 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <cctype> using namespace llvm; -/// NOTE: The constructor takes ownership of TLOF. -TargetLowering::TargetLowering(const TargetMachine &tm, - const TargetLoweringObjectFile *tlof) - : TargetLoweringBase(tm, tlof) {} +/// NOTE: The TargetMachine owns TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm) + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -1283,36 +1283,53 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // (zext x) == C --> x == (trunc C) - if (DCI.isBeforeLegalize() && N0->hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // (sext x) == C --> x == (trunc C) + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + DCI.isBeforeLegalize() && N0->hasOneUse()) { unsigned MinBits = N0.getValueSizeInBits(); - SDValue PreZExt; + SDValue PreExt; + bool Signed = false; if (N0->getOpcode() == ISD::ZERO_EXTEND) { // ZExt MinBits = N0->getOperand(0).getValueSizeInBits(); - PreZExt = N0->getOperand(0); + PreExt = N0->getOperand(0); } else if (N0->getOpcode() == ISD::AND) { // DAGCombine turns costly ZExts into ANDs if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) if ((C->getAPIntValue()+1).isPowerOf2()) { MinBits = C->getAPIntValue().countTrailingOnes(); - PreZExt = N0->getOperand(0); + PreExt = N0->getOperand(0); } + } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { + // SExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + Signed = true; } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { - // ZEXTLOAD + // ZEXTLOAD / SEXTLOAD if (LN0->getExtensionType() == ISD::ZEXTLOAD) { MinBits = LN0->getMemoryVT().getSizeInBits(); - PreZExt = N0; + PreExt = N0; + } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { + Signed = true; + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; } } + // Figure out how many bits we need to preserve this constant. + unsigned ReqdBits = Signed ? + C1.getBitWidth() - C1.getNumSignBits() + 1 : + C1.getActiveBits(); + // Make sure we're not losing bits from the constant. if (MinBits > 0 && - MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) { + MinBits < C1.getBitWidth() && + MinBits >= ReqdBits) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } @@ -2177,7 +2194,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *RI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2239,14 +2257,11 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // Do a prepass over the constraints, canonicalizing them, and building up the // ConstraintOperands list. - InlineAsm::ConstraintInfoVector - ConstraintInfos = IA->ParseConstraints(); - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + ConstraintOperands.emplace_back(std::move(CI)); AsmOperandInfo &OpInfo = ConstraintOperands.back(); // Update multiple alternative constraint count. @@ -2325,7 +2340,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } // If we have multiple alternative constraints, select the best alternative. - if (ConstraintInfos.size()) { + if (ConstraintOperands.size()) { if (maCount) { unsigned bestMAIndex = 0; int bestWeight = -1; @@ -2641,11 +2656,13 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, /// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode *> *Created) const { + assert(Created && "No vector to hold sdiv ops."); + EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2673,38 +2690,36 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // If d > 0 and m < 0, add the numerator if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getScalarSizeInBits() - 1, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(T.getNode()); + Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); } /// \brief Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode *> *Created) const { + assert(Created && "No vector to hold udiv ops."); + EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2725,8 +2740,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. magics = Divisor.lshr(Shift).magicu(Shift); @@ -2744,8 +2758,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.m, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent - if (Created) - Created->push_back(Q.getNode()); + + Created->push_back(Q.getNode()); if (magics.a == 0) { assert(magics.s < Divisor.getBitWidth() && @@ -2754,15 +2768,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } @@ -2785,7 +2796,7 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH, - SDValue RL, SDValue RH) const { + SDValue RL, SDValue RH) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2818,8 +2829,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // The inputs are both zero-extended. if (HasUMUL_LOHI) { // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); Hi = SDValue(Lo.getNode(), 1); return true; } @@ -2834,8 +2845,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // The input values are both sign-extended. if (HasSMUL_LOHI) { // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); Hi = SDValue(Lo.getNode(), 1); return true; } diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index f7c64dac6124..0be00f0e7ce5 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -144,7 +144,7 @@ namespace { LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), - Type::getInt32Ty(C), NULL); + Type::getInt32Ty(C), nullptr); Constant *PersFn = F.getParent()-> getOrInsertFunction("__gcc_personality_v0", diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index b0950ded270a..7fd8107707c8 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -98,7 +99,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) { VoidPtrTy, // __personality VoidPtrTy, // __lsda ArrayType::get(VoidPtrTy, 5), // __jbuf - NULL); + nullptr); RegisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); @@ -138,8 +139,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, - SmallPtrSet<BasicBlock *, 64> &LiveBBs) { - if (!LiveBBs.insert(BB)) + SmallPtrSetImpl<BasicBlock *> &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) @@ -190,7 +191,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", @@ -350,10 +351,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, continue; // Demote the PHIs to the stack. - for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(), - E = PHIsToDemote.end(); - I != E; ++I) - DemotePHIToStack(*I); + for (PHINode *PN : PHIsToDemote) + DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(UnwindBlock->begin()); diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 24e94d11f888..97a5424aa560 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -60,27 +61,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -namespace { -static BlockFrequency Threshold; -} - -/// Decision threshold. A node gets the output value 0 if the weighted sum of -/// its inputs falls in the open interval (-Threshold;Threshold). -static BlockFrequency getThreshold() { return Threshold; } - -/// \brief Set the threshold for a given entry frequency. -/// -/// Set the threshold relative to \c Entry. Since the threshold is used as a -/// bound on the open interval (-Threshold;Threshold), 1 is the minimum -/// threshold. -static void setThreshold(const BlockFrequency &Entry) { - // Apparently 2 is a good threshold when Entry==2^14, but we need to scale - // it. Divide by 2^13, rounding as appropriate. - uint64_t Freq = Entry.getFrequency(); - uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); - Threshold = std::max(UINT64_C(1), Scaled); -} - /// Node - Each edge bundle corresponds to a Hopfield node. /// /// The node contains precomputed frequency data that only depends on the CFG, @@ -126,9 +106,9 @@ struct SpillPlacement::Node { /// clear - Reset per-query data, but preserve frequencies that only depend on // the CFG. - void clear() { + void clear(const BlockFrequency &Threshold) { BiasN = BiasP = Value = 0; - SumLinkWeights = getThreshold(); + SumLinkWeights = Threshold; Links.clear(); } @@ -166,7 +146,7 @@ struct SpillPlacement::Node { /// update - Recompute Value from Bias and Links. Return true when node /// preference changes. - bool update(const Node nodes[]) { + bool update(const Node nodes[], const BlockFrequency &Threshold) { // Compute the weighted sum of inputs. BlockFrequency SumN = BiasN; BlockFrequency SumP = BiasP; @@ -186,9 +166,9 @@ struct SpillPlacement::Node { // 2. It helps tame rounding errors when the links nominally sum to 0. // bool Before = preferReg(); - if (SumN >= SumP + getThreshold()) + if (SumN >= SumP + Threshold) Value = -1; - else if (SumP >= SumN + getThreshold()) + else if (SumP >= SumN + Threshold) Value = 1; else Value = 0; @@ -227,7 +207,7 @@ void SpillPlacement::activate(unsigned n) { if (ActiveNodes->test(n)) return; ActiveNodes->set(n); - nodes[n].clear(); + nodes[n].clear(Threshold); // Very large bundles usually come from big switches, indirect branches, // landing pads, or loops with many 'continue' statements. It is difficult to @@ -244,6 +224,18 @@ void SpillPlacement::activate(unsigned n) { } } +/// \brief Set the threshold for a given entry frequency. +/// +/// Set the threshold relative to \c Entry. Since the threshold is used as a +/// bound on the open interval (-Threshold;Threshold), 1 is the minimum +/// threshold. +void SpillPlacement::setThreshold(const BlockFrequency &Entry) { + // Apparently 2 is a good threshold when Entry==2^14, but we need to scale + // it. Divide by 2^13, rounding as appropriate. + uint64_t Freq = Entry.getFrequency(); + uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); + Threshold = std::max(UINT64_C(1), Scaled); +} /// addConstraints - Compute node biases and weights from a set of constraints. /// Set a bit in NodeMask for each active node. @@ -310,7 +302,7 @@ bool SpillPlacement::scanActiveBundles() { Linked.clear(); RecentPositive.clear(); for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { - nodes[n].update(nodes); + nodes[n].update(nodes, Threshold); // A node that must spill, or a node without any links is not going to // change its value ever again, so exclude it from iterations. if (nodes[n].mustSpill()) @@ -330,7 +322,7 @@ void SpillPlacement::iterate() { // First update the recently positive nodes. They have likely received new // negative bias that will turn them off. while (!RecentPositive.empty()) - nodes[RecentPositive.pop_back_val()].update(nodes); + nodes[RecentPositive.pop_back_val()].update(nodes, Threshold); if (Linked.empty()) return; @@ -349,7 +341,7 @@ void SpillPlacement::iterate() { iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { unsigned n = *I; - if (nodes[n].update(nodes)) { + if (nodes[n].update(nodes, Threshold)) { Changed = true; if (nodes[n].preferReg()) RecentPositive.push_back(n); @@ -363,7 +355,7 @@ void SpillPlacement::iterate() { for (SmallVectorImpl<unsigned>::const_iterator I = std::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; - if (nodes[n].update(nodes)) { + if (nodes[n].update(nodes, Threshold)) { Changed = true; if (nodes[n].preferReg()) RecentPositive.push_back(n); diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index 43fc7f50cef9..622361e7e80c 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -24,8 +24,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H -#define LLVM_CODEGEN_SPILLPLACEMENT_H +#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H +#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -40,7 +40,7 @@ class MachineBasicBlock; class MachineLoopInfo; class MachineBlockFrequencyInfo; -class SpillPlacement : public MachineFunctionPass { +class SpillPlacement : public MachineFunctionPass { struct Node; const MachineFunction *MF; const EdgeBundles *bundles; @@ -60,7 +60,11 @@ class SpillPlacement : public MachineFunctionPass { SmallVector<unsigned, 8> RecentPositive; // Block frequencies are computed once. Indexed by block number. - SmallVector<BlockFrequency, 4> BlockFrequencies; + SmallVector<BlockFrequency, 8> BlockFrequencies; + + /// Decision threshold. A node gets the output value 0 if the weighted sum of + /// its inputs falls in the open interval (-Threshold;Threshold). + BlockFrequency Threshold; public: static char ID; // Pass identification, replacement for typeid. @@ -152,6 +156,7 @@ private: void releaseMemory() override; void activate(unsigned); + void setThreshold(const BlockFrequency &Entry); }; } // end namespace llvm diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp deleted file mode 100644 index 064944894162..000000000000 --- a/lib/CodeGen/Spiller.cpp +++ /dev/null @@ -1,184 +0,0 @@ -//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Spiller.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -#define DEBUG_TYPE "spiller" - -namespace { - enum SpillerName { trivial, inline_ }; -} - -static cl::opt<SpillerName> -spillerOpt("spiller", - cl::desc("Spiller to use: (default: standard)"), - cl::Prefix, - cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumValN(inline_, "inline", "inline spiller"), - clEnumValEnd), - cl::init(trivial)); - -// Spiller virtual destructor implementation. -Spiller::~Spiller() {} - -namespace { - -/// Utility class for spillers. -class SpillerBase : public Spiller { -protected: - MachineFunctionPass *pass; - MachineFunction *mf; - VirtRegMap *vrm; - LiveIntervals *lis; - MachineFrameInfo *mfi; - MachineRegisterInfo *mri; - const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; - - /// Construct a spiller base. - SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : pass(&pass), mf(&mf), vrm(&vrm) - { - lis = &pass.getAnalysis<LiveIntervals>(); - mfi = mf.getFrameInfo(); - mri = &mf.getRegInfo(); - tii = mf.getTarget().getInstrInfo(); - tri = mf.getTarget().getRegisterInfo(); - } - - /// Add spill ranges for every use/def of the live interval, inserting loads - /// immediately before each use, and stores after each def. No folding or - /// remat is attempted. - void trivialSpillEverywhere(LiveRangeEdit& LRE) { - LiveInterval* li = &LRE.getParent(); - - DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); - - assert(li->weight != llvm::huge_valf && - "Attempting to spill already spilled value."); - - assert(!TargetRegisterInfo::isStackSlot(li->reg) && - "Trying to spill a stack slot."); - - DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); - - const TargetRegisterClass *trc = mri->getRegClass(li->reg); - unsigned ss = vrm->assignVirt2StackSlot(li->reg); - - // Iterate over reg uses/defs. - for (MachineRegisterInfo::reg_instr_iterator - regItr = mri->reg_instr_begin(li->reg); - regItr != mri->reg_instr_end();) { - - // Grab the use/def instr. - MachineInstr *mi = &*regItr; - - DEBUG(dbgs() << " Processing " << *mi); - - // Step regItr to the next use/def instr. - ++regItr; - - // Collect uses & defs for this instr. - SmallVector<unsigned, 2> indices; - bool hasUse = false; - bool hasDef = false; - for (unsigned i = 0; i != mi->getNumOperands(); ++i) { - MachineOperand &op = mi->getOperand(i); - if (!op.isReg() || op.getReg() != li->reg) - continue; - hasUse |= mi->getOperand(i).isUse(); - hasDef |= mi->getOperand(i).isDef(); - indices.push_back(i); - } - - // Create a new virtual register for the load and/or store. - unsigned NewVReg = LRE.create(); - - // Update the reg operands & kill flags. - for (unsigned i = 0; i < indices.size(); ++i) { - unsigned mopIdx = indices[i]; - MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(NewVReg); - if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { - mop.setIsKill(true); - } - } - assert(hasUse || hasDef); - - // Insert reload if necessary. - MachineBasicBlock::iterator miItr(mi); - if (hasUse) { - MachineInstrSpan MIS(miItr); - - tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc, - tri); - lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr); - } - - // Insert store if necessary. - if (hasDef) { - MachineInstrSpan MIS(miItr); - - tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg, - true, ss, trc, tri); - lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end()); - } - } - } -}; - -} // end anonymous namespace - -namespace { - -/// Spills any live range using the spill-everywhere method with no attempt at -/// folding. -class TrivialSpiller : public SpillerBase { -public: - - TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : SpillerBase(pass, mf, vrm) {} - - void spill(LiveRangeEdit &LRE) override { - // Ignore spillIs - we don't use it. - trivialSpillEverywhere(LRE); - } -}; - -} // end anonymous namespace - -void Spiller::anchor() { } - -llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm) { - switch (spillerOpt) { - case trivial: return new TrivialSpiller(pass, mf, vrm); - case inline_: return createInlineSpiller(pass, mf, vrm); - } - llvm_unreachable("Invalid spiller optimization"); -} diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index b7d5beaab1b2..08f99ec78adc 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SPILLER_H -#define LLVM_CODEGEN_SPILLER_H +#ifndef LLVM_LIB_CODEGEN_SPILLER_H +#define LLVM_LIB_CODEGEN_SPILLER_H namespace llvm { @@ -31,11 +31,6 @@ namespace llvm { }; - /// Create and return a spiller object, as specified on the command line. - Spiller* createSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm); - /// Create and return a spiller that will insert spill code directly instead /// of deferring though VirtRegMap. Spiller *createInlineSpiller(MachineFunctionPass &pass, diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 7d4f568df27d..4c8801a3611a 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -40,16 +40,11 @@ STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); // Split Analysis //===----------------------------------------------------------------------===// -SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, - const LiveIntervals &lis, +SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, const MachineLoopInfo &mli) - : MF(vrm.getMachineFunction()), - VRM(vrm), - LIS(lis), - Loops(mli), - TII(*MF.getTarget().getInstrInfo()), - CurLI(nullptr), - LastSplitPoint(MF.getNumBlockIDs()) {} + : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli), + TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr), + LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); @@ -125,10 +120,9 @@ void SplitAnalysis::analyzeUses() { // First get all the defs from the interval values. This provides the correct // slots for early clobbers. - for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(), - E = CurLI->vni_end(); I != E; ++I) - if (!(*I)->isPHIDef() && !(*I)->isUnused()) - UseSlots.push_back((*I)->def); + for (const VNInfo *VNI : CurLI->valnos) + if (!VNI->isPHIDef() && !VNI->isUnused()) + UseSlots.push_back(VNI->def); // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -321,22 +315,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, - LiveIntervals &lis, - VirtRegMap &vrm, +SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, MachineDominatorTree &mdt, MachineBlockFrequencyInfo &mbfi) - : SA(sa), LIS(lis), VRM(vrm), - MRI(vrm.getMachineFunction().getRegInfo()), - MDT(mdt), - TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), - TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), - MBFI(mbfi), - Edit(nullptr), - OpenIdx(0), - SpillMode(SM_Partition), - RegAssign(Allocator) -{} + : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()), + MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), + RegAssign(Allocator) {} void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit = &LRE; @@ -740,9 +726,7 @@ void SplitEditor::hoistCopiesForSize() { // Find the nearest common dominator for parent values with multiple // back-copies. If a single back-copy dominates, put it in DomPair.second. - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo *VNI = *VI; + for (VNInfo *VNI : LI->valnos) { if (VNI->isUnused()) continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); @@ -815,9 +799,7 @@ void SplitEditor::hoistCopiesForSize() { // Remove redundant back-copies that are now known to be dominated by another // def with the same value. SmallVector<VNInfo*, 8> BackCopies; - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo *VNI = *VI; + for (VNInfo *VNI : LI->valnos) { if (VNI->isUnused()) continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); @@ -836,16 +818,15 @@ void SplitEditor::hoistCopiesForSize() { bool SplitEditor::transferValues() { bool Skipped = false; RegAssignMap::const_iterator AssignI = RegAssign.begin(); - for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), - ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { - DEBUG(dbgs() << " blit " << *ParentI << ':'); - VNInfo *ParentVNI = ParentI->valno; + for (const LiveRange::Segment &S : Edit->getParent()) { + DEBUG(dbgs() << " blit " << S << ':'); + VNInfo *ParentVNI = S.valno; // RegAssign has holes where RegIdx 0 should be used. - SlotIndex Start = ParentI->start; + SlotIndex Start = S.start; AssignI.advanceTo(Start); do { unsigned RegIdx; - SlotIndex End = ParentI->end; + SlotIndex End = S.end; if (!AssignI.valid()) { RegIdx = 0; } else if (AssignI.start() <= Start) { @@ -930,7 +911,7 @@ bool SplitEditor::transferValues() { ++MBB; } Start = End; - } while (Start != ParentI->end); + } while (Start != S.end); DEBUG(dbgs() << '\n'); } @@ -943,9 +924,7 @@ bool SplitEditor::transferValues() { void SplitEditor::extendPHIKillRanges() { // Extend live ranges to be live-out for successor PHI values. - for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), - E = Edit->getParent().vni_end(); I != E; ++I) { - const VNInfo *PHIVNI = *I; + for (const VNInfo *PHIVNI : Edit->getParent().valnos) { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); @@ -1019,12 +998,11 @@ void SplitEditor::deleteRematVictims() { SmallVector<MachineInstr*, 8> Dead; for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ LiveInterval *LI = &LIS.getInterval(*I); - for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); - LII != LIE; ++LII) { + for (const LiveRange::Segment &S : LI->segments) { // Dead defs end at the dead slot. - if (LII->end != LII->valno->def.getDeadSlot()) + if (S.end != S.valno->def.getDeadSlot()) continue; - MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); + MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); assert(MI && "Missing instruction for dead def"); MI->addRegisterDead(LI->reg, &TRI); @@ -1049,9 +1027,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { // the inserted copies. // Add the original defs from the parent interval. - for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), - E = Edit->getParent().vni_end(); I != E; ++I) { - const VNInfo *ParentVNI = *I; + for (const VNInfo *ParentVNI : Edit->getParent().valnos) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 7048ee37631b..2e60c1445b81 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SPLITKIT_H -#define LLVM_CODEGEN_SPLITKIT_H +#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H +#define LLVM_LIB_CODEGEN_SPLITKIT_H #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 370430c4c82a..faf94b67fe77 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -228,7 +228,7 @@ void StackColoring::dump() const { unsigned StackColoring::collectMarkers(unsigned NumSlot) { unsigned MarkersFound = 0; // Scan the function to find all lifetime markers. - // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. for (MachineBasicBlock *MBB : depth_first(MF)) { @@ -463,7 +463,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { if (!VI.Var) continue; if (SlotRemap.count(VI.Slot)) { - DEBUG(dbgs()<<"Remapping debug info for ["<<VI.Var->getName()<<"].\n"); + DEBUG(dbgs() << "Remapping debug info for [" + << DIVariable(VI.Var).getName() << "].\n"); VI.Slot = SlotRemap[VI.Slot]; FixedDbg++; } diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 3ba502fff695..767f43a420fd 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/StackMapLivenessAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" - +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -67,7 +67,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << _MF.getName() << " **********\n"); MF = &_MF; - TRI = MF->getTarget().getRegisterInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; // Skip this function if there are no patchpoints to process. @@ -123,5 +123,7 @@ uint32_t *StackMapLiveness::createRegisterMask() const { for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end(); RI != RE; ++RI) Mask[*RI / 32] |= 1U << (*RI % 32); + + TRI->adjustStackMapLiveOutMask(Mask); return Mask; } diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 1473fc184f84..f1d1160c9528 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <iterator> using namespace llvm; @@ -83,7 +84,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, switch (MOI->getImm()) { default: llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { - unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + unsigned Size = + AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); @@ -122,7 +124,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = - AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg()); + AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass( + MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); Locs.push_back( Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); @@ -158,7 +161,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { StackMaps::LiveOutVec StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { assert(Mask && "No register mask specified"); - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); LiveOutVec LiveOuts; // Create a LiveOutReg for each bit that is set in the register mask. @@ -217,9 +220,18 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, I != E; ++I) { // Constants are encoded as sign-extended integers. // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool. - if (I->LocType == Location::Constant && - ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) { + if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) { I->LocType = Location::ConstantIndex; + // ConstPool is intentionally a MapVector of 'uint64_t's (as + // opposed to 'int64_t's). We should never be in a situation + // where we have to insert either the tombstone or the empty + // keys into a map, and for a DenseMap<uint64_t, T> these are + // (uint64_t)0 and (uint64_t)-1. They can be and are + // represented using 32 bit integers. + + assert((uint64_t)I->Offset != DenseMapInfo<uint64_t>::getEmptyKey() && + (uint64_t)I->Offset != DenseMapInfo<uint64_t>::getTombstoneKey() && + "empty and tombstone keys should fit in 32 bits!"); auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset)); I->Offset = Result.first - ConstPool.begin(); } @@ -229,15 +241,19 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, // entry. const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( MCSymbolRefExpr::Create(MILabel, OutContext), - MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), + MCSymbolRefExpr::Create(AP.CurrentFnSymForSize, OutContext), OutContext); - CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts)); + CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), + std::move(LiveOuts)); // Record the stack size of the current function. const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); + const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo(); + const bool DynamicFrameSize = MFI->hasVarSizedObjects() || + RegInfo->needsStackRealignment(*(AP.MF)); FnStackSize[AP.CurrentFnSym] = - MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize(); + DynamicFrameSize ? UINT64_MAX : MFI->getStackSize(); } void StackMaps::recordStackMap(const MachineInstr &MI) { @@ -270,6 +286,18 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { } #endif } +void StackMaps::recordStatepoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STATEPOINT && + "expected statepoint"); + + StatepointOpers opers(&MI); + // Record all the deopt and gc operands (they're contiguous and run from the + // initial index to the end of the operand list) + const unsigned StartIdx = opers.getVarIdx(); + recordStackMapOpers(MI, 0xABCDEF00, + MI.operands_begin() + StartIdx, MI.operands_end(), + false); +} /// Emit the stackmap header. /// @@ -485,7 +513,7 @@ void StackMaps::serializeToStackMapSection() { MCContext &OutContext = AP.OutStreamer.getContext(); MCStreamer &OS = AP.OutStreamer; - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); // Create the section. const MCSection *StackMapSection = diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index accfe7be18d9..a132805ac845 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/StackProtector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" @@ -31,8 +32,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <cstdlib> using namespace llvm; @@ -85,7 +88,7 @@ bool StackProtector::runOnFunction(Function &Fn) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - TLI = TM->getTargetLowering(); + TLI = TM->getSubtargetImpl()->getTargetLowering(); Attribute Attr = Fn.getAttributes().getAttribute( AttributeSet::FunctionIndex, "stack-protector-buffer-size"); @@ -168,7 +171,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { // Keep track of what PHI nodes we have already visited to ensure // they are only visited once. - if (VisitedPHIs.insert(PN)) + if (VisitedPHIs.insert(PN).second) if (HasAddressTaken(PN)) return true; } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { @@ -209,20 +212,16 @@ bool StackProtector::RequiresStackProtector() { Attribute::StackProtect)) return false; - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - BasicBlock *BB = I; - - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; - ++II) { - if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { if (AI->isArrayAllocation()) { // SSP-Strong: Enable protectors for any call to alloca, regardless // of size. if (Strong) return true; - if (const ConstantInt *CI = - dyn_cast<ConstantInt>(AI->getArraySize())) { + if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. @@ -334,7 +333,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, /// Returns true if the platform/triple supports the stackprotectorcreate pseudo /// node. static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, - const TargetLoweringBase *TLI, const Triple &Trip, + const TargetLoweringBase *TLI, const Triple &TT, AllocaInst *&AI, Value *&StackGuardVar) { bool SupportsSelectionDAGSP = false; PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); @@ -343,9 +342,10 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - StackGuardVar = ConstantExpr::getIntToPtr( - OffsetVal, PointerType::get(PtrTy, AddressSpace)); - } else if (Trip.getOS() == llvm::Triple::OpenBSD) { + StackGuardVar = + ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, + AddressSpace)); + } else if (TT.isOSOpenBSD()) { StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); cast<GlobalValue>(StackGuardVar) ->setVisibility(GlobalValue::HiddenVisibility); @@ -398,14 +398,13 @@ bool StackProtector::InsertStackProtectors() { InsertionPt = RI; // At this point we know that BB has a return statement so it *DOES* // have a terminator. - assert(InsertionPt != nullptr && "BB must have a terminator instruction at " - "this point."); + assert(InsertionPt != nullptr && + "BB must have a terminator instruction at this point."); } Function *Intrinsic = Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); - } else { // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. @@ -458,11 +457,17 @@ bool StackProtector::InsertStackProtectors() { LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); - B.CreateCondBr(Cmp, NewBB, FailBB); + unsigned SuccessWeight = + BranchProbabilityInfo::getBranchWeightStackProtector(true); + unsigned FailureWeight = + BranchProbabilityInfo::getBranchWeightStackProtector(false); + MDNode *Weights = MDBuilder(F->getContext()) + .createBranchWeights(SuccessWeight, FailureWeight); + B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } - // Return if we didn't modify any basic blocks. I.e., there are no return + // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. if (!HasPrologue) return false; @@ -476,15 +481,17 @@ BasicBlock *StackProtector::CreateFailBB() { LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); IRBuilder<> B(FailBB); - if (Trip.getOS() == llvm::Triple::OpenBSD) { - Constant *StackChkFail = M->getOrInsertFunction( - "__stack_smash_handler", Type::getVoidTy(Context), - Type::getInt8PtrTy(Context), NULL); + if (Trip.isOSOpenBSD()) { + Constant *StackChkFail = + M->getOrInsertFunction("__stack_smash_handler", + Type::getVoidTy(Context), + Type::getInt8PtrTy(Context), nullptr); B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { - Constant *StackChkFail = M->getOrInsertFunction( - "__stack_chk_fail", Type::getVoidTy(Context), NULL); + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context), + nullptr); B.CreateCall(StackChkFail); } B.CreateUnreachable(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 791168f5c05f..cc72e5e51f2c 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -28,7 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <vector> using namespace llvm; @@ -422,7 +422,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { }); MFI = MF.getFrameInfo(); - TII = MF.getTarget().getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); LS = &getAnalysis<LiveStacks>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); diff --git a/lib/CodeGen/StatepointExampleGC.cpp b/lib/CodeGen/StatepointExampleGC.cpp new file mode 100644 index 000000000000..802cf132b34d --- /dev/null +++ b/lib/CodeGen/StatepointExampleGC.cpp @@ -0,0 +1,54 @@ +//===-- StatepointDefaultGC.cpp - The default statepoint GC strategy ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a GCStrategy which serves as an example for the usage +// of a statepoint based lowering strategy. This GCStrategy is intended to +// suitable as a default implementation usable with any collector which can +// consume the standard stackmap format generated by statepoints, uses the +// default addrespace to distinguish between gc managed and non-gc managed +// pointers, and has reasonable relocation semantics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +namespace { +class StatepointGC : public GCStrategy { +public: + StatepointGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + CustomSafePoints = false; + } + Optional<bool> isGCManagedPointer(const Value *V) const override { + // Method is only valid on pointer typed values. + PointerType *PT = cast<PointerType>(V->getType()); + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. + return (1 == PT->getAddressSpace()); + } +}; +} + +static GCRegistry::Add<StatepointGC> +X("statepoint-example", "an example strategy for statepoint"); + +namespace llvm { +void linkStatepointExampleGC() {} +} diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 723a6297f36a..4377236323a7 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "tailduplication" @@ -135,8 +136,8 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); @@ -798,11 +799,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(std::prev(PredBB->end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), E = TailBB->livein_end(); I != E; ++I) { - if (!RegsLiveAtExit[*I]) + if (!RS->isRegUsed(*I, false)) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 883e9d1846d9..1557d10238e9 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -14,8 +14,8 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <cstdlib> using namespace llvm; @@ -26,7 +26,7 @@ TargetFrameLowering::~TargetFrameLowering() { /// the stack frame of the specified index. This is the default implementation /// which is overridden for some targets. int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { + int FI) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->getObjectOffset(FI) + MFI->getStackSize() - getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); @@ -34,7 +34,7 @@ int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); // By default, assume all frame indices are referenced via whatever // getFrameRegister() says. The target can override this if it's doing diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 83966bd0c208..608b8068759e 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -290,13 +291,15 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, Offset = 0; return true; } - unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx); + unsigned BitSize = + TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). if (BitSize % 8) return false; - int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + int BitOffset = + TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; @@ -305,7 +308,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!TM->getDataLayout()->isLittleEndian()) { + if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -370,6 +373,10 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, return nullptr; } +void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + llvm_unreachable("Not a MachO target"); +} + bool TargetInstrInfo:: canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const { @@ -498,7 +505,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const MachineOperand &MO = MI->getOperand(1-Ops[0]); MachineBasicBlock::iterator Pos = MI; - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (Flags == MachineMemOperand::MOStore) storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); @@ -562,8 +569,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, AliasAnalysis *AA) const { const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetMachine &TM = MF.getTarget(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); // Remat clients assume operand 0 is the defined register. if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) @@ -582,7 +587,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, // redundant with subsequent checks, but it's target-independent, // simple, and a common case. int FrameIdx = 0; - if (TII.isLoadFromStackSlot(MI, FrameIdx) && + if (isLoadFromStackSlot(MI, FrameIdx) && MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; @@ -640,6 +645,28 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return true; } +int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + int FrameSetupOpcode = getCallFrameSetupOpcode(); + int FrameDestroyOpcode = getCallFrameDestroyOpcode(); + + if (MI->getOpcode() != FrameSetupOpcode && + MI->getOpcode() != FrameDestroyOpcode) + return 0; + + int SPAdj = MI->getOperand(0).getImm(); + + if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode)) + SPAdj = -SPAdj; + + return SPAdj; +} + /// isSchedulingBoundary - Test if the given instruction should be /// considered a scheduling boundary. This primarily includes labels /// and terminators. @@ -655,8 +682,8 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // saves compile time, because it doesn't require every single // stack slot reference to depend on the instruction that does the // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI)) return true; @@ -746,14 +773,14 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, } /// Return the default expected latency for a def based on it's opcode. -unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel, const MachineInstr *DefMI) const { if (DefMI->isTransient()) return 0; if (DefMI->mayLoad()) - return SchedModel->LoadLatency; + return SchedModel.LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) - return SchedModel->HighLatency; + return SchedModel.HighLatency; return 1; } @@ -852,3 +879,77 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } + +bool TargetInstrInfo::getRegSequenceInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { + assert((MI.isRegSequence() || + MI.isRegSequenceLike()) && "Instruction do not have the proper type"); + + if (!MI.isRegSequence()) + return getRegSequenceLikeInputs(MI, DefIdx, InputRegs); + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + assert(DefIdx == 0 && "REG_SEQUENCE only has one def"); + for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; + OpIdx += 2) { + const MachineOperand &MOReg = MI.getOperand(OpIdx); + const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + // Record Reg:SubReg, SubIdx. + InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(), + (unsigned)MOSubIdx.getImm())); + } + return true; +} + +bool TargetInstrInfo::getExtractSubregInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + assert((MI.isExtractSubreg() || + MI.isExtractSubregLike()) && "Instruction do not have the proper type"); + + if (!MI.isExtractSubreg()) + return getExtractSubregLikeInputs(MI, DefIdx, InputReg); + + // We are looking at: + // Def = EXTRACT_SUBREG v0.sub1, sub0. + assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); + const MachineOperand &MOReg = MI.getOperand(1); + const MachineOperand &MOSubIdx = MI.getOperand(2); + assert(MOSubIdx.isImm() && + "The subindex of the extract_subreg is not an immediate"); + + InputReg.Reg = MOReg.getReg(); + InputReg.SubReg = MOReg.getSubReg(); + InputReg.SubIdx = (unsigned)MOSubIdx.getImm(); + return true; +} + +bool TargetInstrInfo::getInsertSubregInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const { + assert((MI.isInsertSubreg() || + MI.isInsertSubregLike()) && "Instruction do not have the proper type"); + + if (!MI.isInsertSubreg()) + return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg); + + // We are looking at: + // Def = INSERT_SEQUENCE v0, v1, sub0. + assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); + const MachineOperand &MOBaseReg = MI.getOperand(1); + const MachineOperand &MOInsertedReg = MI.getOperand(2); + const MachineOperand &MOSubIdx = MI.getOperand(3); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + BaseReg.Reg = MOBaseReg.getReg(); + BaseReg.SubReg = MOBaseReg.getSubReg(); + + InsertedReg.Reg = MOInsertedReg.getReg(); + InsertedReg.SubReg = MOInsertedReg.getSubReg(); + InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); + return true; +} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e80ef7176c21..9b2fdffa87c1 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -34,6 +34,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <cctype> using namespace llvm; @@ -205,6 +206,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FLOOR_F80] = "floorl"; Names[RTLIB::FLOOR_F128] = "floorl"; Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::FMIN_F32] = "fminf"; + Names[RTLIB::FMIN_F64] = "fmin"; + Names[RTLIB::FMIN_F80] = "fminl"; + Names[RTLIB::FMIN_F128] = "fminl"; + Names[RTLIB::FMIN_PPCF128] = "fminl"; + Names[RTLIB::FMAX_F32] = "fmaxf"; + Names[RTLIB::FMAX_F64] = "fmax"; + Names[RTLIB::FMAX_F80] = "fmaxl"; + Names[RTLIB::FMAX_F128] = "fmaxl"; + Names[RTLIB::FMAX_PPCF128] = "fmaxl"; Names[RTLIB::ROUND_F32] = "roundf"; Names[RTLIB::ROUND_F64] = "round"; Names[RTLIB::ROUND_F80] = "roundl"; @@ -403,7 +414,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SINCOS_PPCF128] = nullptr; } - if (TT.getOS() != Triple::OpenBSD) { + if (!TT.isOSOpenBSD()) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; } else { // These are generally not available. @@ -683,10 +694,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { CCs[RTLIB::O_F128] = ISD::SETEQ; } -/// NOTE: The constructor takes ownership of TLOF. -TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, - const TargetLoweringObjectFile *tlof) - : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) { +/// NOTE: The TargetMachine owns TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) + : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) { initActions(); // Perform these initializations only once. @@ -700,10 +710,13 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; IntDivIsCheap = false; - Pow2DivIsCheap = false; + FsqrtIsCheap = false; + Pow2SDivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; + EnableExtLdPromotion = false; + HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -718,7 +731,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; InsertFencesForAtomic = false; - SupportJumpTables = true; MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); @@ -726,10 +738,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, InitLibcallCallingConvs(LibcallCallingConvs); } -TargetLoweringBase::~TargetLoweringBase() { - delete &TLOF; -} - void TargetLoweringBase::initActions() { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); @@ -741,35 +749,32 @@ void TargetLoweringBase::initActions() { memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); // Set default actions for various operations. - for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + for (MVT VT : MVT::all_valuetypes()) { // Default all indexed load / store to expand. for (unsigned IM = (unsigned)ISD::PRE_INC; IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { - setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); - setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedLoadAction(IM, VT, Expand); + setIndexedStoreAction(IM, VT, Expand); } // Most backends expect to see the node which just returns the value loaded. - setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, - (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); // These operations default to expand. - setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FGETSIGN, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); // These library functions default to expand. - setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); // These operations default to expand for vector types. - if (VT >= MVT::FIRST_VECTOR_VALUETYPE && - VT <= MVT::LAST_VECTOR_VALUETYPE) { - setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, - (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, - (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, - (MVT::SimpleValueType)VT, Expand); + if (VT.isVector()) { + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } } @@ -792,6 +797,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f16, Expand); setOperationAction(ISD::FEXP2, MVT::f16, Expand); setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FMINNUM, MVT::f16, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f16, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); setOperationAction(ISD::FCEIL, MVT::f16, Expand); setOperationAction(ISD::FRINT, MVT::f16, Expand); @@ -803,6 +810,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f32, Expand); setOperationAction(ISD::FEXP2, MVT::f32, Expand); setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FMINNUM, MVT::f32, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); setOperationAction(ISD::FCEIL, MVT::f32, Expand); setOperationAction(ISD::FRINT, MVT::f32, Expand); @@ -814,6 +823,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f64, Expand); setOperationAction(ISD::FEXP2, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FMINNUM, MVT::f64, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); @@ -825,6 +836,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f128, Expand); setOperationAction(ISD::FEXP2, MVT::f128, Expand); setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FMINNUM, MVT::f128, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); setOperationAction(ISD::FCEIL, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); @@ -976,11 +989,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, // Add a new memory operand for this FI. const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - TM.getDataLayout()->getPointerSize(), - MFI.getObjectAlignment(FI)); + + unsigned Flags = MachineMemOperand::MOLoad; + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOVolatile; + } + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(FI), Flags, + TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), + MFI.getObjectAlignment(FI)); MIB->addMemOperand(MF, MMO); // Replace the instruction and update the operand index. @@ -996,7 +1014,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, /// of the register class for the specified type and its associated "cost". std::pair<const TargetRegisterClass*, uint8_t> TargetLoweringBase::findRepresentativeClass(MVT VT) const { - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; if (!RC) return std::make_pair(RC, 0); @@ -1023,8 +1042,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const { /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLoweringBase::computeRegisterProperties() { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); // Everything defaults to needing one register. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { @@ -1179,8 +1198,12 @@ void TargetLoweringBase::computeRegisterProperties() { TransformToType[i] = MVT::Other; if (PreferredAction == TypeScalarizeVector) ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); - else + else if (PreferredAction == TypeSplitVector) ValueTypeActions.setTypeAction(VT, TypeSplitVector); + else + // Set type action according to the number of elements. + ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector + : TypeSplitVector); } else { TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f59efa35031c..9f1e06b4725d 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; using namespace dwarf; @@ -72,9 +73,10 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getDataLayout()->getPointerSize(); + unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment( + TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); @@ -287,7 +289,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = - TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)); + TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( + cast<GlobalVariable>(GV)); const char *SizeSpec = ".rodata.str1."; if (Kind.isMergeable2ByteCString()) @@ -355,44 +358,59 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, return DataRelROSection; } -const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym) const { - // The default scheme is .ctor / .dtor, so we have to invert the priority - // numbering. - if (Priority == 65535) - return StaticCtorSection; +static const MCSectionELF *getStaticStructorSection(MCContext &Ctx, + bool UseInitArray, + bool IsCtor, + unsigned Priority, + const MCSymbol *KeySym) { + std::string Name; + unsigned Type; + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; + SectionKind Kind = SectionKind::getDataRel(); + StringRef COMDAT = KeySym ? KeySym->getName() : ""; + + if (KeySym) + Flags |= ELF::SHF_GROUP; if (UseInitArray) { - std::string Name = std::string(".init_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (IsCtor) { + Type = ELF::SHT_INIT_ARRAY; + Name = ".init_array"; + } else { + Type = ELF::SHT_FINI_ARRAY; + Name = ".fini_array"; + } + if (Priority != 65535) { + Name += '.'; + Name += utostr(Priority); + } } else { - std::string Name = std::string(".ctors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (IsCtor) + Name = ".ctors"; + else + Name = ".dtors"; + if (Priority != 65535) { + Name += '.'; + Name += utostr(65535 - Priority); + } + Type = ELF::SHT_PROGBITS; } + + return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT); } -const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( +const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { - // The default scheme is .ctor / .dtor, so we have to invert the priority - // numbering. - if (Priority == 65535) - return StaticDtorSection; + return getStaticStructorSection(getContext(), UseInitArray, true, Priority, + KeySym); +} - if (UseInitArray) { - std::string Name = std::string(".fini_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); - } else { - std::string Name = std::string(".dtors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); - } +const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getStaticStructorSection(getContext(), UseInitArray, false, Priority, + KeySym); } void @@ -446,14 +464,15 @@ emitModuleFlags(MCStreamer &Streamer, continue; StringRef Key = MFE.Key->getString(); - Value *Val = MFE.Val; + Metadata *Val = MFE.Val; if (Key == "Objective-C Image Info Version") { - VersionVal = cast<ConstantInt>(Val)->getZExtValue(); + VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue(); } else if (Key == "Objective-C Garbage Collection" || Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated") { - ImageInfoFlags |= cast<ConstantInt>(Val)->getZExtValue(); + Key == "Objective-C Is Simulated" || + Key == "Objective-C Image Swift Version") { + ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue(); } else if (Key == "Objective-C Image Info Section") { SectionVal = cast<MDString>(Val)->getString(); } else if (Key == "Linker Options") { @@ -554,41 +573,6 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( return S; } -bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( - const MCSection &Section) const { - const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); - - // Sections holding 1 byte strings are atomized based on the data - // they contain. - // Sections holding 2 byte strings require symbols in order to be - // atomized. - // There is no dedicated section for 4 byte strings. - if (SMO.getKind().isMergeable1ByteCString()) - return false; - - if (SMO.getSegmentName() == "__DATA" && - SMO.getSectionName() == "__cfstring") - return false; - - switch (SMO.getType()) { - default: - return true; - - // These sections are atomized at the element boundaries without using - // symbols. - case MachO::S_4BYTE_LITERALS: - case MachO::S_8BYTE_LITERALS: - case MachO::S_16BYTE_LITERALS: - case MachO::S_LITERAL_POINTERS: - case MachO::S_NON_LAZY_SYMBOL_POINTERS: - case MachO::S_LAZY_SYMBOL_POINTERS: - case MachO::S_MOD_INIT_FUNC_POINTERS: - case MachO::S_MOD_TERM_FUNC_POINTERS: - case MachO::S_INTERPOSING: - return false; - } -} - const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -611,17 +595,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32) + TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) return UStringSection; - if (Kind.isMergeableConst()) { + // With MachO only variables whose corresponding symbol starts with 'l' or + // 'L' can be merged, so we only try merging GVs with private linkage. + if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) { if (Kind.isMergeableConst4()) return FourByteConstantSection; if (Kind.isMergeableConst8()) @@ -739,7 +727,7 @@ getCOFFSectionFlags(SectionKind K) { COFF::IMAGE_SCN_MEM_EXECUTE | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_CNT_CODE; - else if (K.isBSS ()) + else if (K.isBSS()) Flags |= COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | @@ -749,7 +737,7 @@ getCOFFSectionFlags(SectionKind K) { COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; - else if (K.isReadOnly()) + else if (K.isReadOnly() || K.isReadOnlyWithRel()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ; @@ -774,7 +762,7 @@ static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { if (ComdatGV->getComdat() != C) report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + - "' is not a key for it's COMDAT."); + "' is not a key for its COMDAT."); return ComdatGV; } @@ -843,9 +831,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { return ".bss"; if (Kind.isThreadLocal()) return ".tls$"; - if (Kind.isWriteable()) - return ".data"; - return ".rdata"; + if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) + return ".rdata"; + return ".data"; } @@ -893,7 +881,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isThreadLocal()) return TLSDataSection; - if (Kind.isReadOnly()) + if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) return ReadOnlySection; // Note: we claim that common symbols are put in BSSSection, but they are @@ -924,7 +912,7 @@ emitModuleFlags(MCStreamer &Streamer, i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { const Module::ModuleFlagEntry &MFE = *i; StringRef Key = MFE.Key->getString(); - Value *Val = MFE.Val; + Metadata *Val = MFE.Val; if (Key == "Linker Options") { LinkerOptions = cast<MDNode>(Val); break; @@ -944,7 +932,7 @@ emitModuleFlags(MCStreamer &Streamer, StringRef Op = MDOption->getString(); // Lead with a space for consistency with our dllexport implementation. std::string Escaped(" "); - if (Op.find(" ") != StringRef::npos) { + if (!Op.startswith("\"") && (Op.find(" ") != StringRef::npos)) { // The PE-COFF spec says args with spaces must be quoted. It doesn't say // how to escape quotes, but it probably uses this algorithm: // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx @@ -960,29 +948,14 @@ emitModuleFlags(MCStreamer &Streamer, } } -static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, - const MCSection *Sec, - const MCSymbol *KeySym) { - // Return the normal section if we don't have to be associative. - if (!KeySym) - return Sec; - - // Make an associative section with the same name and kind as the normal - // section. - const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec); - unsigned Characteristics = - SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT; - return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics, - SecCOFF->getKind(), KeySym->getName(), - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); -} - const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym); + return getContext().getAssociativeCOFFSection( + cast<MCSectionCOFF>(StaticCtorSection), KeySym); } const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym); + return getContext().getAssociativeCOFFSection( + cast<MCSectionCOFF>(StaticDtorSection), KeySym); } diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 3ca2017550cf..618d903a0904 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -51,3 +51,10 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } + +/// getCFIFuncName - If this returns a non-empty string, then it is the name of +/// the function that gets called on CFI violations in CFI non-enforcing mode +/// (!TargetOptions::CFIEnforcing). +StringRef TargetOptions::getCFIFuncName() const { + return CFIFuncName; +} diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index a3a4fb3f02b9..61a66b623928 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -108,7 +109,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { /// register of the given type, picking the most sub register class of /// the right type that contains this physreg. const TargetRegisterClass * -TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); // Pick the most sub register class of the right type that contains @@ -293,3 +294,11 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, // All clear, tell the register allocator to prefer this register. Hints.push_back(Phys); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void +TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, + const TargetRegisterInfo *TRI) { + dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n"; +} +#endif diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index b0f2ca68884b..ef2dab1287fa 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -16,7 +16,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -157,7 +156,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *UseMI, unsigned UseOperIdx) const { if (!hasInstrSchedModel() && !hasInstrItineraries()) - return TII->defaultDefLatency(&SchedModel, DefMI); + return TII->defaultDefLatency(SchedModel, DefMI); if (hasInstrItineraries()) { int OperLatency = 0; @@ -181,7 +180,7 @@ unsigned TargetSchedModel::computeOperandLatency( // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(&SchedModel, DefMI)); + TII->defaultDefLatency(SchedModel, DefMI)); return InstrLatency; } // hasInstrSchedModel() @@ -222,7 +221,29 @@ unsigned TargetSchedModel::computeOperandLatency( // FIXME: Automatically giving all implicit defs defaultDefLatency is // undesirable. We should only do it for defs that are known to the MC // desc like flags. Truly implicit defs should get 1 cycle latency. - return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI); +} + +unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { + assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); + + unsigned SCIdx = TII->get(Opcode).getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); + unsigned Latency = 0; + + if (SCDesc->isValid() && !SCDesc->isVariant()) { + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); + } + return Latency; + } + + assert(Latency && "No MI sched latency"); + return 0; } unsigned @@ -248,7 +269,7 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI, return Latency; } } - return TII->defaultDefLatency(&SchedModel, MI); + return TII->defaultDefLatency(SchedModel, MI); } unsigned TargetSchedModel:: @@ -268,7 +289,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // for predicated defs. unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); const MachineFunction &MF = *DefMI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) return computeInstrLatency(DefMI); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index f42d47bebda7..e218a8387c53 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -48,6 +48,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "twoaddrinstr" @@ -544,10 +545,21 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, if (ToRegA) { unsigned FromRegB = getMappedReg(regB, SrcRegMap); unsigned FromRegC = getMappedReg(regC, SrcRegMap); - bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); - bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); - if (BComp != CComp) - return !BComp && CComp; + bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI); + bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI); + + // Compute if any of the following are true: + // -RegB is not tied to a register and RegC is compatible with RegA. + // -RegB is tied to the wrong physical register, but RegC is. + // -RegB is tied to the wrong physical register, and RegC isn't tied. + if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC))) + return true; + // Don't compute if any of the following are true: + // -RegC is not tied to a register and RegB is compatible with RegA. + // -RegC is tied to the wrong physical register, but RegB is. + // -RegC is tied to the wrong physical register, and RegB isn't tied. + if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB))) + return false; } // If there is a use of regC between its last def (could be livein) and this @@ -666,7 +678,7 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) { unsigned Reg = DstReg; while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, NewReg, IsDstPhys)) { - if (IsCopy && !Processed.insert(UseMI)) + if (IsCopy && !Processed.insert(UseMI).second) break; DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); @@ -1503,9 +1515,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const TargetMachine &TM = MF->getTarget(); MRI = &MF->getRegInfo(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); - InstrItins = TM.getInstrItineraryData(); + TII = TM.getSubtargetImpl()->getInstrInfo(); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); + InstrItins = TM.getSubtargetImpl()->getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 2e220820b921..7824f9218575 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -64,9 +64,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { SmallPtrSet<BasicBlock*, 8> Reachable; // Mark all reachable blocks. - for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I = - df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I) - /* Mark all reachable blocks */; + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; // Loop over all dead blocks, remembering them and deleting all instructions // in them. @@ -125,10 +124,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); // Mark all reachable blocks. - for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> > - I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); - I != E; ++I) - /* Mark all reachable blocks */; + for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; // Loop over all dead blocks, remembering them and deleting all instructions // in them. diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 704736f07945..7d3b0cea1c15 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -54,8 +55,8 @@ INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false) bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); - TII = mf.getTarget().getInstrInfo(); - TRI = mf.getTarget().getRegisterInfo(); + TII = mf.getSubtarget().getInstrInfo(); + TRI = mf.getSubtarget().getRegisterInfo(); MF = &mf; Virt2PhysMap.clear(); @@ -123,7 +124,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { OS << '[' << PrintReg(Reg, TRI) << " -> " << PrintReg(Virt2PhysMap[Reg], TRI) << "] " - << MRI->getRegClass(Reg)->getName() << "\n"; + << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; } } @@ -131,7 +132,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] - << "] " << MRI->getRegClass(Reg)->getName() << "\n"; + << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; } } OS << '\n'; @@ -205,8 +206,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { MF = &fn; TM = &MF->getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); MRI = &MF->getRegInfo(); Indexes = &getAnalysis<SlotIndexes>(); LIS = &getAnalysis<LiveIntervals>(); @@ -251,20 +252,41 @@ void VirtRegRewriter::addMBBLiveIns() { unsigned PhysReg = VRM->getPhys(VirtReg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); - // Scan the segments of LI. - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I != E; - ++I) { - if (!Indexes->findLiveInMBBs(I->start, I->end, LiveIn)) - continue; - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) - if (!LiveIn[i]->isLiveIn(PhysReg)) - LiveIn[i]->addLiveIn(PhysReg); - LiveIn.clear(); + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + for (const auto &Seg : S.segments) { + if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) + continue; + for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) { + unsigned SubReg = SR.getSubReg(); + unsigned SubRegIndex = SR.getSubRegIndex(); + unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex); + if ((SubRegLaneMask & S.LaneMask) == 0) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { + if (!LiveIn[i]->isLiveIn(SubReg)) + LiveIn[i]->addLiveIn(SubReg); + } + } + LiveIn.clear(); + } + } + } else { + // Scan the segments of LI. + for (const auto &Seg : LI.segments) { + if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn)) + continue; + for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) + if (!LiveIn[i]->isLiveIn(PhysReg)) + LiveIn[i]->addLiveIn(PhysReg); + LiveIn.clear(); + } } } } void VirtRegRewriter::rewrite() { + bool NoSubRegLiveness = !MRI->tracksSubRegLiveness(); SmallVector<unsigned, 8> SuperDeads; SmallVector<unsigned, 8> SuperDefs; SmallVector<unsigned, 8> SuperKills; @@ -346,7 +368,8 @@ void VirtRegRewriter::rewrite() { // A virtual register kill refers to the whole register, so we may // have to add <imp-use,kill> operands for the super-register. A // partial redef always kills and redefines the super-register. - if (MO.readsReg() && (MO.isDef() || MO.isKill())) + if (NoSubRegLiveness && MO.readsReg() + && (MO.isDef() || MO.isKill())) SuperKills.push_back(PhysReg); if (MO.isDef()) { @@ -357,10 +380,12 @@ void VirtRegRewriter::rewrite() { MO.setIsUndef(false); // Also add implicit defs for the super-register. - if (MO.isDead()) - SuperDeads.push_back(PhysReg); - else - SuperDefs.push_back(PhysReg); + if (NoSubRegLiveness) { + if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); + } } // PhysReg operands cannot have subregister indexes. |