diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-16 19:46:52 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-16 19:46:52 +0000 |
commit | 6b3f41ed88e8e440e11a4fbf20b6600529f80049 (patch) | |
tree | 928b056f24a634d628c80238dbbf10d41b1a71d5 /lib/CodeGen | |
parent | c46e6a5940c50058e00c0c5f9123fd82e338d29a (diff) |
Diffstat (limited to 'lib/CodeGen')
47 files changed, 1862 insertions, 1002 deletions
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 87b45c001de4..98163bffb60b 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -767,7 +767,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) - FuncName = GlobalValue::getRealLinkageName(GV->getName()); + FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); @@ -888,13 +888,21 @@ void CodeViewDebug::collectVariableInfoFromMFTable( if (!Scope) continue; + // If the variable has an attached offset expression, extract it. + // FIXME: Try to handle DW_OP_deref as well. + int64_t ExprOffset = 0; + if (VI.Expr) + if (!VI.Expr->extractIfOffset(ExprOffset)) + continue; + // Get the frame register used and the offset. unsigned FrameReg = 0; int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); // Calculate the label ranges. - LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset); + LocalVarDefRange DefRange = + createDefRangeMem(CVReg, FrameOffset + ExprOffset); for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); @@ -2194,7 +2202,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (GV->hasComdat()) { MCSymbol *GVSym = Asm->getSymbol(GV); OS.AddComment("Symbol subsection for " + - Twine(GlobalValue::getRealLinkageName(GV->getName()))); + Twine(GlobalValue::dropLLVMManglingEscape(GV->getName()))); switchToDebugSectionForSymbol(GVSym); EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 1d63e33a4d33..826162ad47c4 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -129,10 +129,9 @@ bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { } void DebugHandlerBase::beginFunction(const MachineFunction *MF) { - assert(Asm); PrevInstBB = nullptr; - if (!hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MMI, MF)) { skippedNonDebugFunction(); return; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 738e062cb93f..e172712cf889 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -440,7 +440,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; + DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); @@ -634,7 +634,7 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { - DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; + DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; if (AbsDef) return; @@ -696,7 +696,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); - if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { + if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); @@ -708,6 +708,42 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { } } +void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) { + DbgVariable *AbsVar = getExistingAbstractVariable( + InlinedVariable(Var.getVariable(), Var.getInlinedAt())); + auto *VariableDie = Var.getDIE(); + if (AbsVar && AbsVar->getDIE()) { + addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, + *AbsVar->getDIE()); + } else + applyVariableAttributes(Var, *VariableDie); +} + +DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) { + const DILocalVariable *Cleansed; + return getExistingAbstractVariable(IV, Cleansed); +} + +// Find abstract variable, if any, associated with Var. +DbgVariable *DwarfCompileUnit::getExistingAbstractVariable( + InlinedVariable IV, const DILocalVariable *&Cleansed) { + // More then one inlined variable corresponds to one abstract variable. + Cleansed = IV.first; + auto &AbstractVariables = getAbstractVariables(); + auto I = AbstractVariables.find(Cleansed); + if (I != AbstractVariables.end()) + return I->second.get(); + return nullptr; +} + +void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var, + LexicalScope *Scope) { + assert(Scope && Scope->isAbstractScope()); + auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr); + DU->addScopeVariable(Scope, AbsDbgVariable.get()); + getAbstractVariables()[Var] = std::move(AbsDbgVariable); +} + void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. if (!Skeleton) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 20a415150b4d..77e9e671529f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -68,6 +68,9 @@ class DwarfCompileUnit final : public DwarfUnit { // ranges/locs. const MCSymbol *BaseAddress; + DenseMap<const MDNode *, DIE *> AbstractSPDies; + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + /// \brief Construct a DIE for the given DbgVariable without initializing the /// DbgVariable's DIE reference. DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); @@ -76,6 +79,18 @@ class DwarfCompileUnit final : public DwarfUnit { bool includeMinimalInlineScopes() const; + DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return AbstractSPDies; + return DU->getAbstractSPDies(); + } + + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return AbstractVariables; + return DU->getAbstractVariables(); + } + public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); @@ -189,6 +204,13 @@ public: DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); + void finishVariableDefinition(const DbgVariable &Var); + /// Find abstract variable associated with Var. + typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + DbgVariable *getExistingAbstractVariable(InlinedVariable IV, + const DILocalVariable *&Cleansed); + DbgVariable *getExistingAbstractVariable(InlinedVariable IV); + void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 6f442f5c3172..3410b98d7776 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -71,6 +71,10 @@ static cl::opt<bool> GenerateARangeSection("generate-arange-section", cl::desc("Generate dwarf aranges"), cl::init(false)); +static cl::opt<bool> SplitDwarfCrossCuReferences( + "split-dwarf-cross-cu-references", cl::Hidden, + cl::desc("Enable cross-cu references in DWO files"), cl::init(false)); + namespace { enum DefaultOnOff { Default, Enable, Disable }; } @@ -362,21 +366,29 @@ template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) { F(*SkelCU); } -void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { +bool DwarfDebug::shareAcrossDWOCUs() const { + return SplitDwarfCrossCuReferences; +} + +void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, + LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); auto *SP = cast<DISubprogram>(Scope->getScopeNode()); - ProcessedSPNodes.insert(SP); - // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. auto &CU = *CUMap.lookup(SP->getUnit()); - forBothCUs(CU, [&](DwarfCompileUnit &CU) { + if (auto *SkelCU = CU.getSkeleton()) { + (shareAcrossDWOCUs() ? CU : SrcCU) + .constructAbstractSubprogramScopeDIE(Scope); + if (CU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructAbstractSubprogramScopeDIE(Scope); + } else { CU.constructAbstractSubprogramScopeDIE(Scope); - }); + } } void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { @@ -564,13 +576,7 @@ void DwarfDebug::finishVariableDefinitions() { // DIE::getUnit isn't simple - it walks parent pointers, etc. DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie()); assert(Unit); - DbgVariable *AbsVar = getExistingAbstractVariable( - InlinedVariable(Var->getVariable(), Var->getInlinedAt())); - if (AbsVar && AbsVar->getDIE()) { - Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, - *AbsVar->getDIE()); - } else - Unit->applyVariableAttributes(*Var, *VariableDie); + Unit->finishVariableDefinition(*Var); } } @@ -718,58 +724,32 @@ void DwarfDebug::endModule() { } // clean up. - AbstractVariables.clear(); + // FIXME: AbstractVariables.clear(); } -// Find abstract variable, if any, associated with Var. -DbgVariable * -DwarfDebug::getExistingAbstractVariable(InlinedVariable IV, - const DILocalVariable *&Cleansed) { - // More then one inlined variable corresponds to one abstract variable. - Cleansed = IV.first; - auto I = AbstractVariables.find(Cleansed); - if (I != AbstractVariables.end()) - return I->second.get(); - return nullptr; -} - -DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { - const DILocalVariable *Cleansed; - return getExistingAbstractVariable(IV, Cleansed); -} - -void DwarfDebug::createAbstractVariable(const DILocalVariable *Var, - LexicalScope *Scope) { - assert(Scope && Scope->isAbstractScope()); - auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr); - InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); - AbstractVariables[Var] = std::move(AbsDbgVariable); -} - -void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV, +void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV, const MDNode *ScopeNode) { const DILocalVariable *Cleansed = nullptr; - if (getExistingAbstractVariable(IV, Cleansed)) + if (CU.getExistingAbstractVariable(IV, Cleansed)) return; - createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( + CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( cast<DILocalScope>(ScopeNode))); } -void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( +void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV, const MDNode *ScopeNode) { const DILocalVariable *Cleansed = nullptr; - if (getExistingAbstractVariable(IV, Cleansed)) + if (CU.getExistingAbstractVariable(IV, Cleansed)) return; if (LexicalScope *Scope = LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode))) - createAbstractVariable(Cleansed, Scope); + CU.createAbstractVariable(Cleansed, Scope); } - // Collect variable information from side table maintained by MF. void DwarfDebug::collectVariableInfoFromMFTable( - DenseSet<InlinedVariable> &Processed) { + DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) { for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; @@ -784,7 +764,7 @@ void DwarfDebug::collectVariableInfoFromMFTable( if (!Scope) continue; - ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); + ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode()); auto RegVar = make_unique<DbgVariable>(Var.first, Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (InfoHolder.addScopeVariable(Scope, RegVar.get())) @@ -955,9 +935,10 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, } } -DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope, +DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, + LexicalScope &Scope, InlinedVariable IV) { - ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode()); + ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode()); ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second)); InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); return ConcreteVariables.back().get(); @@ -980,7 +961,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet<InlinedVariable> &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMFTable(Processed); + collectVariableInfoFromMFTable(TheCU, Processed); for (const auto &I : DbgValues) { InlinedVariable IV = I.first; @@ -1002,7 +983,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, continue; Processed.insert(IV); - DbgVariable *RegVar = createConcreteVariable(*Scope, IV); + DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); @@ -1038,7 +1019,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, for (const DILocalVariable *DV : SP->getVariables()) { if (Processed.insert(InlinedVariable(DV, nullptr)).second) if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) - createConcreteVariable(*Scope, InlinedVariable(DV, nullptr)); + createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); } } @@ -1229,12 +1210,12 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { for (const DILocalVariable *DV : SP->getVariables()) { if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) continue; - ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr), + ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), DV->getScope()); assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes && "ensureAbstractVariableIsCreated inserted abstract scopes"); } - constructAbstractSubprogramScopeDIE(AScope); + constructAbstractSubprogramScopeDIE(TheCU, AScope); } ProcessedSPNodes.insert(SP); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 8a96e7867b6e..b9c5aa9ffb23 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -210,7 +210,6 @@ class DwarfDebug : public DebugHandlerBase { DenseMap<const MCSymbol *, uint64_t> SymSize; /// Collection of abstract variables. - DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists @@ -313,20 +312,16 @@ class DwarfDebug : public DebugHandlerBase { typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; - /// Find abstract variable associated with Var. - DbgVariable *getExistingAbstractVariable(InlinedVariable IV, - const DILocalVariable *&Cleansed); - DbgVariable *getExistingAbstractVariable(InlinedVariable IV); - void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); - void ensureAbstractVariableIsCreated(InlinedVariable Var, + void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var, const MDNode *Scope); - void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var, + void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var, const MDNode *Scope); - DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV); + DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU, + LexicalScope &Scope, InlinedVariable IV); /// Construct a DIE for this abstract scope. - void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); void finishVariableDefinitions(); @@ -446,7 +441,8 @@ class DwarfDebug : public DebugHandlerBase { const DbgValueHistoryMap::InstrRanges &Ranges); /// Collect variable information from the side table maintained by MF. - void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P); + void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, + DenseSet<InlinedVariable> &P); protected: /// Gather pre-function debug information. @@ -518,6 +514,8 @@ public: /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } + bool shareAcrossDWOCUs() const; + /// Returns the Dwarf Version. uint16_t getDwarfVersion() const; diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index d4d2ed277274..54924e9806ed 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -53,6 +53,7 @@ class DwarfFile { // Collection of abstract subprogram DIEs. DenseMap<const MDNode *, DIE *> AbstractSPDies; + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead @@ -105,6 +106,9 @@ public: DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { return AbstractSPDies; } + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() { + return AbstractVariables; + } void insertDIE(const MDNode *TypeMD, DIE *Die) { DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 8d25def7772c..667afbb450bd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -173,7 +173,7 @@ int64_t DwarfUnit::getDefaultLowerBound() const { } /// Check whether the DIE for this MDNode can be shared across CUs. -static bool isShareableAcrossCUs(const DINode *D) { +bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // When the MDNode can be part of the type system, the DIE can be shared // across CUs. // Combining type units and cross-CU DIE sharing is lower value (since @@ -181,6 +181,8 @@ static bool isShareableAcrossCUs(const DINode *D) { // level already) but may be implementable for some value in projects // building multiple independent libraries with LTO and then linking those // together. + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return false; return (isa<DIType>(D) || (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && !GenerateDwarfTypeUnits; @@ -645,7 +647,7 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { addString(Die, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + GlobalValue::dropLLVMManglingEscape(LinkageName)); } void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index 8fc841703e23..7acad2cbd89f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -65,7 +65,7 @@ public: //===----------------------------------------------------------------------===// /// This dwarf writer support class manages information associated with a /// source file. - class DwarfUnit : public DIEUnit { +class DwarfUnit : public DIEUnit { protected: /// MDNode for the compile unit. const DICompileUnit *CUNode; @@ -103,6 +103,9 @@ protected: bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); + bool shareAcrossDWOCUs() const; + bool isShareableAcrossCUs(const DINode *D) const; + public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 704f0ac2f191..815658bfb637 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -101,7 +101,7 @@ void WinException::beginFunction(const MachineFunction *MF) { // functions may still refer to it. const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); StringRef FLinkageName = - GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); } shouldEmitLSDA = hasEHFunclets; @@ -174,7 +174,7 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, // their funclet entry block's number. const MachineFunction *MF = MBB->getParent(); const Function *F = MF->getFunction(); - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); MCContext &Ctx = MF->getContext(); StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + @@ -252,7 +252,7 @@ void WinException::endFunclet() { !CurrentFuncletEntry->isCleanupFuncletEntry()) { // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); @@ -536,7 +536,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { // Emit a label assignment with the SEH frame offset so we can use it for // llvm.x86.seh.recoverfp. StringRef FLinkageName = - GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); const MCExpr *MCOffset = @@ -635,7 +635,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { auto &OS = *Asm->OutStreamer; const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; @@ -942,7 +942,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, void WinException::emitExceptHandlerTable(const MachineFunction *MF) { MCStreamer &OS = *Asm->OutStreamer; const Function *F = MF->getFunction(); - StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 9c19a4fd3c3e..17e6be05eb42 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -47,8 +47,7 @@ namespace { bool runOnFunction(Function &F) override; private: - bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, - bool IsStore, bool IsLoad); + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); bool tryExpandAtomicLoad(LoadInst *LI); @@ -224,22 +223,16 @@ bool AtomicExpand::runOnFunction(Function &F) { if (TLI->shouldInsertFencesForAtomic(I)) { auto FenceOrdering = AtomicOrdering::Monotonic; - bool IsStore, IsLoad; if (LI && isAcquireOrStronger(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); LI->setOrdering(AtomicOrdering::Monotonic); - IsStore = false; - IsLoad = true; } else if (SI && isReleaseOrStronger(SI->getOrdering())) { FenceOrdering = SI->getOrdering(); SI->setOrdering(AtomicOrdering::Monotonic); - IsStore = true; - IsLoad = false; } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || isAcquireOrStronger(RMWI->getOrdering()))) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(AtomicOrdering::Monotonic); - IsStore = IsLoad = true; } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && (isReleaseOrStronger(CASI->getSuccessOrdering()) || isAcquireOrStronger(CASI->getSuccessOrdering()))) { @@ -250,11 +243,10 @@ bool AtomicExpand::runOnFunction(Function &F) { FenceOrdering = CASI->getSuccessOrdering(); CASI->setSuccessOrdering(AtomicOrdering::Monotonic); CASI->setFailureOrdering(AtomicOrdering::Monotonic); - IsStore = IsLoad = true; } if (FenceOrdering != AtomicOrdering::Monotonic) { - MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + MadeChange |= bracketInstWithFences(I, FenceOrdering); } } @@ -320,13 +312,12 @@ bool AtomicExpand::runOnFunction(Function &F) { return MadeChange; } -bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, - bool IsStore, bool IsLoad) { +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { IRBuilder<> Builder(I); - auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad); + auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); - auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad); + auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); // The trailing fence is emitted before the instruction instead of after // because there is no easy way of setting Builder insertion point after // an instruction. So we must erase it from the BB, and insert it back @@ -1048,8 +1039,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) - TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitLeadingFence(Builder, CI, SuccessOrder); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1064,8 +1054,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasingStoreBB); if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) - TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitLeadingFence(Builder, CI, SuccessOrder); Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); @@ -1094,8 +1083,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // necessary. Builder.SetInsertPoint(SuccessBB); if (ShouldInsertFencesForAtomic) - TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitTrailingFence(Builder, CI, SuccessOrder); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); @@ -1107,8 +1095,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(FailureBB); if (ShouldInsertFencesForAtomic) - TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 26da748fa244..55a27e2fb79e 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + ExpandReductions.cpp FaultMaps.cpp FEntryInserter.cpp FuncletLayout.cpp @@ -48,6 +49,7 @@ add_llvm_library(LLVMCodeGen LivePhysRegs.cpp LiveRangeCalc.cpp LiveRangeEdit.cpp + LiveRangeShrink.cpp LiveRegMatrix.cpp LiveRegUnits.cpp LiveStackAnalysis.cpp @@ -118,6 +120,7 @@ add_llvm_library(LLVMCodeGen SafeStack.cpp SafeStackColoring.cpp SafeStackLayout.cpp + ScalarizeMaskedMemIntrin.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 3fc12ccc3b60..4d30c6574b12 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -43,6 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); + initializeLiveRangeShrinkPass(Registry); initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); @@ -79,7 +80,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); - initializeSafeStackPass(Registry); + initializeSafeStackLegacyPassPass(Registry); + initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c6c93811a0f9..f2e024c5e3bd 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -295,7 +295,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (PSI->isFunctionHotInCallGraph(&F)) F.setSectionPrefix(".hot"); else if (PSI->isFunctionColdInCallGraph(&F)) - F.setSectionPrefix(".cold"); + F.setSectionPrefix(".unlikely"); } /// This optimization identifies DIV instructions that can be @@ -1549,519 +1549,6 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -// Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, -// <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.load, label %else -// -//cond.load: ; preds = %0 -// %4 = getelementptr i32* %1, i32 0 -// %5 = load i32* %4 -// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 -// br label %else -// -//else: ; preds = %0, %cond.load -// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] -// %7 = extractelement <16 x i1> %mask, i32 1 -// %8 = icmp eq i1 %7, true -// br i1 %8, label %cond.load1, label %else2 -// -//cond.load1: ; preds = %else -// %9 = getelementptr i32* %1, i32 1 -// %10 = load i32* %9 -// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 -// br label %else2 -// -//else2: ; preds = %else, %cond.load1 -// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] -// %12 = extractelement <16 x i1> %mask, i32 2 -// %13 = icmp eq i1 %12, true -// br i1 %13, label %cond.load4, label %else5 -// -static void scalarizeMaskedLoad(CallInst *CI) { - Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast<VectorType>(CI->getType()); - assert(VecType && "Unexpected return type of masked load intrinsic"); - - Type *EltTy = CI->getType()->getVectorElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - bool IsAllOnesMask = isa<Constant>(Mask) && - cast<Constant>(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { - Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); - // Bitcast %addr fron i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); - - Value *UndefVal = UndefValue::get(VecType); - - // The result vector - Value *VResult = UndefVal; - - if (isa<ConstantVector>(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx)); - } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_load = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.load, label %else - // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } - - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - } - - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); -} - -// Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, -// <16 x i1> %mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.store, label %else -// -// cond.store: ; preds = %0 -// %4 = extractelement <16 x i32> %val, i32 0 -// %5 = getelementptr i32* %1, i32 0 -// store i32 %4, i32* %5 -// br label %else -// -// else: ; preds = %0, %cond.store -// %6 = extractelement <16 x i1> %mask, i32 1 -// %7 = icmp eq i1 %6, true -// br i1 %7, label %cond.store1, label %else2 -// -// cond.store1: ; preds = %else -// %8 = extractelement <16 x i32> %val, i32 1 -// %9 = getelementptr i32* %1, i32 1 -// store i32 %8, i32* %9 -// br label %else2 -// . . . -static void scalarizeMaskedStore(CallInst *CI) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast<VectorType>(Src->getType()); - assert(VecType && "Unexpected data type in masked store intrinsic"); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - bool IsAllOnesMask = isa<Constant>(Mask) && - cast<Constant>(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { - Builder.CreateAlignedStore(Src, Ptr, AlignVal); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); - // Bitcast %addr fron i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); - - if (isa<ConstantVector>(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); - } - CI->eraseFromParent(); - return; - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_store, label %cond.store, label %else - // - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); -} - -// Translate a masked gather intrinsic like -// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, -// <16 x i1> %Mask, <16 x i32> %Src) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> %Mask, i32 0 -// % ToLoad0 = icmp eq i1 % Mask0, true -// br i1 % ToLoad0, label %cond.load, label %else -// -// cond.load: -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// % Load0 = load i32, i32* % Ptr0, align 4 -// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 -// br label %else -// -// else: -// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] -// % Mask1 = extractelement <16 x i1> %Mask, i32 1 -// % ToLoad1 = icmp eq i1 % Mask1, true -// br i1 % ToLoad1, label %cond.load1, label %else2 -// -// cond.load1: -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// % Load1 = load i32, i32* % Ptr1, align 4 -// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 -// br label %else2 -// . . . -// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src -// ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI) { - Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - VectorType *VecType = dyn_cast<VectorType>(CI->getType()); - - assert(VecType && "Unexpected return type of masked load intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - Value *UndefVal = UndefValue::get(VecType); - - // The result vector - Value *VResult = UndefVal; - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa<ConstantVector>(Mask); - - if (IsConstMask) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, - "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx), - "Res" + Twine(Idx)); - } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = extractelement <16 x i1> %Mask, i32 1 - // %ToLoad1 = icmp eq i1 %Mask1, true - // br i1 %ToLoad1, label %cond.load, label %else - // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } - - Value *Predicate = Builder.CreateExtractElement(Mask, - Builder.getInt32(Idx), - "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToLoad" + Twine(Idx)); - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, - "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), - "Res" + Twine(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - } - - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); -} - -// Translate a masked scatter intrinsic, like -// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, -// <16 x i1> %Mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set. -// -// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> % Mask, i32 0 -// % ToStore0 = icmp eq i1 % Mask0, true -// br i1 %ToStore0, label %cond.store, label %else -// -// cond.store: -// % Elt0 = extractelement <16 x i32> %Src, i32 0 -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// store i32 %Elt0, i32* % Ptr0, align 4 -// br label %else -// -// else: -// % Mask1 = extractelement <16 x i1> % Mask, i32 1 -// % ToStore1 = icmp eq i1 % Mask1, true -// br i1 % ToStore1, label %cond.store1, label %else2 -// -// cond.store1: -// % Elt1 = extractelement <16 x i32> %Src, i32 1 -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// store i32 % Elt1, i32* % Ptr1, align 4 -// br label %else2 -// . . . -static void scalarizeMaskedScatter(CallInst *CI) { - Value *Src = CI->getArgOperand(0); - Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - assert(isa<VectorType>(Src->getType()) && - "Unexpected data type in masked scatter intrinsic"); - assert(isa<VectorType>(Ptrs->getType()) && - isa<PointerType>(Ptrs->getType()->getVectorElementType()) && - "Vector of pointers is expected in masked scatter intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); - unsigned VectorWidth = Src->getType()->getVectorNumElements(); - - // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa<ConstantVector>(Mask); - - if (IsConstMask) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - } - CI->eraseFromParent(); - return; - } - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx - // % ToStore = icmp eq i1 % Mask1, true - // br i1 % ToStore, label %cond.store, label %else - // - Value *Predicate = Builder.CreateExtractElement(Mask, - Builder.getInt32(Idx), - "Mask" + Twine(Idx)); - Value *Cmp = - Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToStore" + Twine(Idx)); - - // Create "cond" block - // - // % Elt1 = extractelement <16 x i32> %Src, i32 1 - // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 - // %store i32 % Elt1, i32* % Ptr1 - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); -} - /// If counting leading or trailing zeros is an expensive operation and a zero /// input is defined, add a check for zero to avoid calling the intrinsic. /// @@ -2242,39 +1729,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } return true; } - case Intrinsic::masked_load: { - // Scalarize unsupported vector masked load - if (!TTI->isLegalMaskedLoad(CI->getType())) { - scalarizeMaskedLoad(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_store: { - if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { - scalarizeMaskedStore(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_gather: { - if (!TTI->isLegalMaskedGather(CI->getType())) { - scalarizeMaskedGather(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_scatter: { - if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { - scalarizeMaskedScatter(CI); - ModifiedDT = true; - return true; - } - return false; - } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index ab2382e2db6d..e860906043dd 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -142,8 +142,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); - if (SrcMO.getReg() == DstMO.getReg()) { - DEBUG(dbgs() << "identity copy: " << *MI); + bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg()); + if (IdentityCopy || SrcMO.isUndef()) { + DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. if (SrcMO.isUndef() || MI->getNumOperands() > 2) { diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp new file mode 100644 index 000000000000..a40ea28056dd --- /dev/null +++ b/lib/CodeGen/ExpandReductions.cpp @@ -0,0 +1,167 @@ +//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR expansion for reduction intrinsics, allowing targets +// to enable the experimental intrinsics until just before codegen. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +unsigned getOpcode(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_vector_reduce_fadd: + return Instruction::FAdd; + case Intrinsic::experimental_vector_reduce_fmul: + return Instruction::FMul; + case Intrinsic::experimental_vector_reduce_add: + return Instruction::Add; + case Intrinsic::experimental_vector_reduce_mul: + return Instruction::Mul; + case Intrinsic::experimental_vector_reduce_and: + return Instruction::And; + case Intrinsic::experimental_vector_reduce_or: + return Instruction::Or; + case Intrinsic::experimental_vector_reduce_xor: + return Instruction::Xor; + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + return Instruction::ICmp; + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + return Instruction::FCmp; + default: + llvm_unreachable("Unexpected ID"); + } +} + +RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_vector_reduce_smax: + return RecurrenceDescriptor::MRK_SIntMax; + case Intrinsic::experimental_vector_reduce_smin: + return RecurrenceDescriptor::MRK_SIntMin; + case Intrinsic::experimental_vector_reduce_umax: + return RecurrenceDescriptor::MRK_UIntMax; + case Intrinsic::experimental_vector_reduce_umin: + return RecurrenceDescriptor::MRK_UIntMin; + case Intrinsic::experimental_vector_reduce_fmax: + return RecurrenceDescriptor::MRK_FloatMax; + case Intrinsic::experimental_vector_reduce_fmin: + return RecurrenceDescriptor::MRK_FloatMin; + default: + return RecurrenceDescriptor::MRK_Invalid; + } +} + +bool expandReductions(Function &F, const TargetTransformInfo *TTI) { + bool Changed = false; + SmallVector<IntrinsicInst*, 4> Worklist; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (auto II = dyn_cast<IntrinsicInst>(&*I)) + Worklist.push_back(II); + + for (auto *II : Worklist) { + IRBuilder<> Builder(II); + Value *Vec = nullptr; + auto ID = II->getIntrinsicID(); + auto MRK = RecurrenceDescriptor::MRK_Invalid; + switch (ID) { + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + // FMFs must be attached to the call, otherwise it's an ordered reduction + // and it can't be handled by generating this shuffle sequence. + // TODO: Implement scalarization of ordered reductions here for targets + // without native support. + if (!II->getFastMathFlags().unsafeAlgebra()) + continue; + Vec = II->getArgOperand(1); + break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + Vec = II->getArgOperand(0); + MRK = getMRK(ID); + break; + default: + continue; + } + if (!TTI->shouldExpandReduction(II)) + continue; + auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + II->replaceAllUsesWith(Rdx); + II->eraseFromParent(); + Changed = true; + } + return Changed; +} + +class ExpandReductions : public FunctionPass { +public: + static char ID; + ExpandReductions() : FunctionPass(ID) { + initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + return expandReductions(F, TTI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.setPreservesCFG(); + } +}; +} + +char ExpandReductions::ID; +INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", + "Expand reduction intrinsics", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", + "Expand reduction intrinsics", false, false) + +FunctionPass *llvm::createExpandReductionsPass() { + return new ExpandReductions(); +} + +PreservedAnalyses ExpandReductionsPass::run(Function &F, + FunctionAnalysisManager &AM) { + const auto &TTI = AM.getResult<TargetIRAnalysis>(F); + if (!expandReductions(F, &TTI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index eaf4056e47ea..4d4591042296 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -162,7 +162,7 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, return std::get<0>(getAction(MI, MRI)) == Legal; } -LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect, +Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const { switch(Action) { default: @@ -174,20 +174,20 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect, return Aspect.Type; case NarrowScalar: { return findLegalType(Aspect, - [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); + [](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); } case WidenScalar: { - return findLegalType(Aspect, [&](LLT Ty) -> LLT { + return findLegalType(Aspect, [](LLT Ty) -> LLT { return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); }); } case FewerElements: { return findLegalType(Aspect, - [&](LLT Ty) -> LLT { return Ty.halfElements(); }); + [](LLT Ty) -> LLT { return Ty.halfElements(); }); } case MoreElements: { return findLegalType(Aspect, - [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); + [](LLT Ty) -> LLT { return Ty.doubleElements(); }); } } } diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 7248f50945d0..2eb3cdee694d 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -204,12 +204,8 @@ uint64_t RegBankSelect::getRepairCost( // TODO: use a dedicated constant for ImpossibleCost. if (Cost != UINT_MAX) return Cost; - assert(!TPC->isGlobalISelAbortEnabled() && - "Legalization not available yet"); // Return the legalization cost of that repairing. } - assert(!TPC->isGlobalISelAbortEnabled() && - "Complex repairing not implemented yet"); return UINT_MAX; } @@ -452,6 +448,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // Sums up the repairing cost of MO at each insertion point. uint64_t RepairCost = getRepairCost(MO, ValMapping); + + // This is an impossible to repair cost. + if (RepairCost == UINT_MAX) + continue; + // Bias used for splitting: 5%. const uint64_t PercentageForBias = 5; uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100; diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 3c93f8123b0d..254bdf10d804 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -110,3 +110,11 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, return None; } + +const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) + return nullptr; + return MI->getOperand(1).getFPImm(); +} diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 37fe41582333..628d599a3cc7 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1318,7 +1318,8 @@ static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) { return false; PI = I++; } - return true; + // Finally see if the last I is indeed a successor to PI. + return PI->isSuccessor(&*I); } /// Invalidate predecessor BB info so it would be re-analyzed to determine if it @@ -1587,22 +1588,32 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } + // To be able to insert code freely at the end of BBI we sometimes remove + // the branch from BBI to NextMBB temporarily. Remember if this happened. + bool RemovedBranchToNextMBB = false; if (CvtMBB.pred_size() > 1) { BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); - // RemoveExtraEdges won't work if the block has an unanalyzable branch, so - // explicitly remove CvtBBI as a successor. + // Keep the CFG updated. BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); - // Now merge the entry of the triangle with the true block. + // Remove the branch from the entry of the triangle to NextBB to be able to + // do the merge below. Keep the CFG updated, but remember we removed the + // branch since we do want to execute NextMBB, either by introducing a + // branch to it again, or merging it into the entry block. + // How it's handled is decided further down. BBI.NonPredSize -= TII->removeBranch(*BBI.BB); + BBI.BB->removeSuccessor(&NextMBB, true); + RemovedBranchToNextMBB = true; + + // Now merge the entry of the triangle with the true block. MergeBlocks(BBI, *CvtBBI, false); } @@ -1640,12 +1651,19 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. if (!HasEarlyExit && - NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough && + // We might have removed BBI from NextMBB's predecessor list above but + // we want it to be there, so consider that too. + (NextMBB.pred_size() == (RemovedBranchToNextMBB ? 0 : 1)) && + !NextBBI->HasFallThrough && !NextMBB.hasAddressTaken()) { + // We will merge NextBBI into BBI, and thus remove the current + // fallthrough from BBI into CvtBBI. + BBI.BB->removeSuccessor(&CvtMBB, true); MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { InsertUncondBranch(*BBI.BB, NextMBB, TII); + BBI.BB->addSuccessor(&NextMBB); BBI.HasFallThrough = false; } // Mixed predicated and unpredicated code. This cannot be iteratively @@ -1653,8 +1671,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { IterIfcvt = false; } - RemoveExtraEdges(BBI); - // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp new file mode 100644 index 000000000000..00182e2c779f --- /dev/null +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -0,0 +1,211 @@ +//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +///===---------------------------------------------------------------------===// +/// +/// \file +/// This pass moves instructions close to the definition of its operands to +/// shrink live range of the def instruction. The code motion is limited within +/// the basic block. The moved instruction should have 1 def, and more than one +/// uses, all of which are the only use of the def. +/// +///===---------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "lrshrink" + +STATISTIC(NumInstrsHoistedToShrinkLiveRange, + "Number of insructions hoisted to shrink live range."); + +using namespace llvm; + +namespace { +class LiveRangeShrink : public MachineFunctionPass { +public: + static char ID; + + LiveRangeShrink() : MachineFunctionPass(ID) { + initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Live Range Shrink"; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char LiveRangeShrink::ID = 0; +char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID; + +INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false, + false) +namespace { +typedef DenseMap<MachineInstr *, unsigned> InstOrderMap; + +/// Returns \p New if it's dominated by \p Old, otherwise return \p Old. +/// \p M maintains a map from instruction to its dominating order that satisfies +/// M[A] > M[B] guarantees that A is dominated by B. +/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return +/// \p New. +MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old, + const InstOrderMap &M) { + auto NewIter = M.find(&New); + if (NewIter == M.end()) + return Old; + if (Old == nullptr) + return &New; + unsigned OrderOld = M.find(Old)->second; + unsigned OrderNew = NewIter->second; + if (OrderOld != OrderNew) + return OrderOld < OrderNew ? &New : Old; + // OrderOld == OrderNew, we need to iterate down from Old to see if it + // can reach New, if yes, New is dominated by Old. + for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew; + I = I->getNextNode()) + if (I == &New) + return &New; + return Old; +} + +/// Builds Instruction to its dominating order number map \p M by traversing +/// from instruction \p Start. +void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) { + M.clear(); + unsigned i = 0; + for (MachineInstr &I : make_range(Start, Start->getParent()->end())) + M[&I] = i++; +} +} // end anonymous namespace + +bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + InstOrderMap IOM; + // Map from register to instruction order (value of IOM) where the + // register is used last. When moving instructions up, we need to + // make sure all its defs (including dead def) will not cross its + // last use when moving up. + DenseMap<unsigned, unsigned> UseMap; + + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + bool SawStore = false; + BuildInstOrderMap(MBB.begin(), IOM); + UseMap.clear(); + + for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { + MachineInstr &MI = *Next; + ++Next; + if (MI.isPHI() || MI.isDebugValue()) + continue; + if (MI.mayStore()) + SawStore = true; + + unsigned CurrentOrder = IOM[&MI]; + unsigned Barrier = 0; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isDebug()) + continue; + if (MO.isUse()) + UseMap[MO.getReg()] = CurrentOrder; + else if (MO.isDead() && UseMap.count(MO.getReg())) + // Barrier is the last instruction where MO get used. MI should not + // be moved above Barrier. + Barrier = std::max(Barrier, UseMap[MO.getReg()]); + } + + if (!MI.isSafeToMove(nullptr, SawStore)) { + // If MI has side effects, it should become a barrier for code motion. + // IOM is rebuild from the next instruction to prevent later + // instructions from being moved before this MI. + if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + BuildInstOrderMap(Next, IOM); + SawStore = false; + } + continue; + } + + const MachineOperand *DefMO = nullptr; + MachineInstr *Insert = nullptr; + + // Number of live-ranges that will be shortened. We do not count + // live-ranges that are defined by a COPY as it could be coalesced later. + unsigned NumEligibleUse = 0; + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isDead() || MO.isDebug()) + continue; + unsigned Reg = MO.getReg(); + // Do not move the instruction if it def/uses a physical register, + // unless it is a constant physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI.isConstantPhysReg(Reg)) { + Insert = nullptr; + break; + } + if (MO.isDef()) { + // Do not move if there is more than one def. + if (DefMO) { + Insert = nullptr; + break; + } + DefMO = &MO; + } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg)) { + MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); + if (!DefInstr.isCopy()) + NumEligibleUse++; + Insert = FindDominatedInstruction(DefInstr, Insert, IOM); + } else { + Insert = nullptr; + break; + } + } + // Move the instruction when # of shrunk live range > 1. + if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { + MachineBasicBlock::iterator I = std::next(Insert->getIterator()); + // Skip all the PHI and debug instructions. + while (I != MBB.end() && (I->isPHI() || I->isDebugValue())) + I = std::next(I); + if (I == MI.getIterator()) + continue; + + // Update the dominator order to be the same as the insertion point. + // We do this to maintain a non-decreasing order without need to update + // all instruction orders after the insertion point. + unsigned NewOrder = IOM[&*I]; + IOM[&MI] = NewOrder; + NumInstrsHoistedToShrinkLiveRange++; + + // Find MI's debug value following MI. + MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); + if (MI.getOperand(0).isReg()) + for (; EndIter != MBB.end() && EndIter->isDebugValue() && + EndIter->getOperand(0).isReg() && + EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg(); + ++EndIter, ++Next) + IOM[&*EndIter] = NewOrder; + MBB.splice(I, &MBB, MI.getIterator(), EndIter); + } + } + } + return false; +} diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 3568b0294ad9..a9aec926115a 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -767,7 +767,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *SuccBB) { const unsigned NumNew = BB->getNumber(); - SmallSet<unsigned, 16> Defs, Kills; + DenseSet<unsigned> Defs, Kills; MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); for (; BBI != BBE && BBI->isPHI(); ++BBI) { diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 4cfc128a8c1d..5003115a770f 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -133,6 +133,14 @@ static cl::opt<unsigned> TailDupPlacementThreshold( "that won't conflict."), cl::init(2), cl::Hidden); +// Heuristic for aggressive tail duplication. +static cl::opt<unsigned> TailDupPlacementAggressiveThreshold( + "tail-dup-placement-aggressive-threshold", + cl::desc("Instruction cutoff for aggressive tail duplication during " + "layout. Used at -O3. Tail merging during layout is forced to " + "have a threshold that won't conflict."), cl::init(3), + cl::Hidden); + // Heuristic for tail duplication. static cl::opt<unsigned> TailDupPlacementPenalty( "tail-dup-placement-penalty", @@ -2646,9 +2654,26 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { assert(BlockToChain.empty()); assert(ComputedEdges.empty()); + unsigned TailDupSize = TailDupPlacementThreshold; + // If only the aggressive threshold is explicitly set, use it. + if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && + TailDupPlacementThreshold.getNumOccurrences() == 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + // For agressive optimization, we can adjust some thresholds to be less + // conservative. + if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) { + // At O3 we should be more willing to copy blocks for tail duplication. This + // increases size pressure, so we only do it at O3 + // Do this unless only the regular threshold is explicitly set. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 || + TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + } + if (TailDupPlacement) { MPDT = &getAnalysis<MachinePostDominatorTree>(); - unsigned TailDupSize = TailDupPlacementThreshold; if (MF.getFunction()->optForSize()) TailDupSize = 1; TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); @@ -2658,7 +2683,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { buildCFGChains(); // Changing the layout can create new tail merging opportunities. - TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && @@ -2666,7 +2690,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFoldPlacement; // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { - unsigned TailMergeSize = TailDupPlacementThreshold + 1; + unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index bfb2cde030dc..ab433273b189 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -2063,12 +2063,12 @@ void MachineVerifier::verifyStackFrame() { if (I.getOpcode() == FrameSetupOpcode) { if (BBState.ExitIsSetup) report("FrameSetup is after another FrameSetup", &I); - BBState.ExitValue -= TII->getFrameSize(I); + BBState.ExitValue -= TII->getFrameTotalSize(I); BBState.ExitIsSetup = true; } if (I.getOpcode() == FrameDestroyOpcode) { - int Size = TII->getFrameSize(I); + int Size = TII->getFrameTotalSize(I); if (!BBState.ExitIsSetup) report("FrameDestroy is not after a FrameSetup", &I); int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c67a25b888bf..db2264b2439d 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -34,7 +34,7 @@ #include <algorithm> using namespace llvm; -#define DEBUG_TYPE "phielim" +#define DEBUG_TYPE "phi-node-elimination" static cl::opt<bool> DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index bf44ee8453b6..1803ea2b9249 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -3214,7 +3214,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { CurrList(WorkList.begin() + PrevSize, WorkList.end()); if (copyCoalesceWorkList(CurrList)) WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), - (MachineInstr*)nullptr), WorkList.end()); + nullptr), WorkList.end()); } void RegisterCoalescer::coalesceLocals() { diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 35db30f89976..0635e5c0a63c 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -62,10 +62,9 @@ void RegScavenger::init(MachineBasicBlock &MBB) { } this->MBB = &MBB; - for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { - I->Reg = 0; - I->Restore = nullptr; + for (ScavengedInfo &SI : Scavenged) { + SI.Reg = 0; + SI.Restore = nullptr; } Tracking = false; diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index 7fa379d80c6c..08b3d345f689 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -19,6 +19,7 @@ #include "SafeStackLayout.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -92,11 +93,11 @@ public: /// determined statically), and the unsafe stack, which contains all /// local variables that are accessed in ways that we can't prove to /// be safe. -class SafeStack : public FunctionPass { - const TargetMachine *TM; - const TargetLoweringBase *TL; - const DataLayout *DL; - ScalarEvolution *SE; +class SafeStack { + Function &F; + const TargetLoweringBase &TL; + const DataLayout &DL; + ScalarEvolution &SE; Type *StackPtrTy; Type *IntPtrTy; @@ -171,33 +172,21 @@ class SafeStack : public FunctionPass { uint64_t AllocaSize); public: - static char ID; // Pass identification, replacement for typeid. - SafeStack(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) { - initializeSafeStackPass(*PassRegistry::getPassRegistry()); - } - SafeStack() : SafeStack(nullptr) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<ScalarEvolutionWrapperPass>(); - } - - bool doInitialization(Module &M) override { - DL = &M.getDataLayout(); - - StackPtrTy = Type::getInt8PtrTy(M.getContext()); - IntPtrTy = DL->getIntPtrType(M.getContext()); - Int32Ty = Type::getInt32Ty(M.getContext()); - Int8Ty = Type::getInt8Ty(M.getContext()); - - return false; - } - - bool runOnFunction(Function &F) override; -}; // class SafeStack + SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL, + ScalarEvolution &SE) + : F(F), TL(TL), DL(DL), SE(SE), + StackPtrTy(Type::getInt8PtrTy(F.getContext())), + IntPtrTy(DL.getIntPtrType(F.getContext())), + Int32Ty(Type::getInt32Ty(F.getContext())), + Int8Ty(Type::getInt8Ty(F.getContext())) {} + + // Run the transformation on the associated function. + // Returns whether the function was changed. + bool run(); +}; uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { - uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { auto C = dyn_cast<ConstantInt>(AI->getArraySize()); if (!C) @@ -209,11 +198,11 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, const Value *AllocaPtr, uint64_t AllocaSize) { - AllocaOffsetRewriter Rewriter(*SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr)); + AllocaOffsetRewriter Rewriter(SE, AllocaPtr); + const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); - uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType()); - ConstantRange AccessStartRange = SE->getUnsignedRange(Expr); + uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType()); + ConstantRange AccessStartRange = SE.getUnsignedRange(Expr); ConstantRange SizeRange = ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize)); ConstantRange AccessRange = AccessStartRange.add(SizeRange); @@ -226,8 +215,8 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, << *AllocaPtr << "\n" << " Access " << *Addr << "\n" << " SCEV " << *Expr - << " U: " << SE->getUnsignedRange(Expr) - << ", S: " << SE->getSignedRange(Expr) << "\n" + << " U: " << SE.getUnsignedRange(Expr) + << ", S: " << SE.getSignedRange(Expr) << "\n" << " Range " << AccessRange << "\n" << " AllocaRange " << AllocaRange << "\n" << " " << (Safe ? "safe" : "unsafe") << "\n"); @@ -266,7 +255,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { switch (I->getOpcode()) { case Instruction::Load: { - if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr, + if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr, AllocaSize)) return false; break; @@ -282,7 +271,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { return false; } - if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()), + if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getOperand(0)->getType()), AllocaPtr, AllocaSize)) return false; break; @@ -343,7 +332,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { } Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) { - Value *StackGuardVar = TL->getIRStackGuard(IRB); + Value *StackGuardVar = TL.getIRStackGuard(IRB); if (!StackGuardVar) StackGuardVar = F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy); @@ -390,7 +379,7 @@ void SafeStack::findInsts(Function &F, if (!Arg.hasByValAttr()) continue; uint64_t Size = - DL->getTypeStoreSize(Arg.getType()->getPointerElementType()); + DL.getTypeStoreSize(Arg.getType()->getPointerElementType()); if (IsSafeStackAlloca(&Arg, Size)) continue; @@ -476,19 +465,19 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (StackGuardSlot) { Type *Ty = StackGuardSlot->getAllocatedType(); unsigned Align = - std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); + std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot), Align, SSC.getFullLiveRange()); } for (Argument *Arg : ByValArguments) { Type *Ty = Arg->getType()->getPointerElementType(); - uint64_t Size = DL->getTypeStoreSize(Ty); + uint64_t Size = DL.getTypeStoreSize(Ty); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. // Ensure the object is properly aligned. - unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment()); SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange()); } @@ -501,7 +490,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // Ensure the object is properly aligned. unsigned Align = - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()); + std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()); SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI)); } @@ -539,7 +528,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( unsigned Offset = SSL.getObjectOffset(Arg); Type *Ty = Arg->getType()->getPointerElementType(); - uint64_t Size = DL->getTypeStoreSize(Ty); + uint64_t Size = DL.getTypeStoreSize(Ty); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. @@ -630,7 +619,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false); Type *Ty = AI->getAllocatedType(); - uint64_t TySize = DL->getTypeAllocSize(Ty); + uint64_t TySize = DL.getTypeAllocSize(Ty); Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize)); Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy); @@ -638,7 +627,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( // Align the SP value to satisfy the AllocaInst, type and stack alignments. unsigned Align = std::max( - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()), + std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()), (unsigned)StackAlignment); assert(isPowerOf2_32(Align)); @@ -685,25 +674,10 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( } } -bool SafeStack::runOnFunction(Function &F) { - DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); - - if (!F.hasFnAttribute(Attribute::SafeStack)) { - DEBUG(dbgs() << "[SafeStack] safestack is not requested" - " for this function\n"); - return false; - } - - if (F.isDeclaration()) { - DEBUG(dbgs() << "[SafeStack] function definition" - " is not available\n"); - return false; - } - - if (!TM) - report_fatal_error("Target machine is required"); - TL = TM->getSubtargetImpl(F)->getTargetLowering(); - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); +bool SafeStack::run() { + assert(F.hasFnAttribute(Attribute::SafeStack) && + "Can't run SafeStack on a function without the attribute"); + assert(!F.isDeclaration() && "Can't run SafeStack on a function declaration"); ++NumFunctions; @@ -736,7 +710,7 @@ bool SafeStack::runOnFunction(Function &F) { ++NumUnsafeStackRestorePointsFunctions; IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); - UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB); + UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); // Load the current stack pointer (we'll also use it as a base pointer). // FIXME: use a dedicated register for it ? @@ -788,14 +762,70 @@ bool SafeStack::runOnFunction(Function &F) { return true; } +class SafeStackLegacyPass : public FunctionPass { + const TargetMachine *TM; + +public: + static char ID; // Pass identification, replacement for typeid.. + SafeStackLegacyPass(const TargetMachine *TM) : FunctionPass(ID), TM(TM) { + initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + SafeStackLegacyPass() : SafeStackLegacyPass(nullptr) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + } + + bool runOnFunction(Function &F) override { + DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); + + if (!F.hasFnAttribute(Attribute::SafeStack)) { + DEBUG(dbgs() << "[SafeStack] safestack is not requested" + " for this function\n"); + return false; + } + + if (F.isDeclaration()) { + DEBUG(dbgs() << "[SafeStack] function definition" + " is not available\n"); + return false; + } + + if (!TM) + report_fatal_error("Target machine is required"); + auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); + if (!TL) + report_fatal_error("TargetLowering instance is required"); + + auto *DL = &F.getParent()->getDataLayout(); + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + + // Compute DT and LI only for functions that have the attribute. + // This is only useful because the legacy pass manager doesn't let us + // compute analyzes lazily. + // In the backend pipeline, nothing preserves DT before SafeStack, so we + // would otherwise always compute it wastefully, even if there is no + // function with the safestack attribute. + DominatorTree DT(F); + LoopInfo LI(DT); + + ScalarEvolution SE(F, TLI, ACT, DT, LI); + + return SafeStack(F, *TL, *DL, SE).run(); + } +}; + } // anonymous namespace -char SafeStack::ID = 0; -INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack", +char SafeStackLegacyPass::ID = 0; +INITIALIZE_TM_PASS_BEGIN(SafeStackLegacyPass, "safe-stack", "Safe Stack instrumentation pass", false, false) -INITIALIZE_TM_PASS_END(SafeStack, "safe-stack", +INITIALIZE_TM_PASS_END(SafeStackLegacyPass, "safe-stack", "Safe Stack instrumentation pass", false, false) FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { - return new SafeStack(TM); + return new SafeStackLegacyPass(TM); } diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp new file mode 100644 index 000000000000..dab5b91f50ad --- /dev/null +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -0,0 +1,660 @@ +//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===// +//=== instrinsics ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces masked memory intrinsics - when unsupported by the target +// - with a chain of basic blocks, that deal with the elements one-by-one if the +// appropriate mask bit is set. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "scalarize-masked-mem-intrin" + +namespace { + +class ScalarizeMaskedMemIntrin : public FunctionPass { + const TargetTransformInfo *TTI; + +public: + static char ID; // Pass identification, replacement for typeid + explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) { + initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Scalarize Masked Memory Intrinsics"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + } + +private: + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); +}; +} // namespace + +char ScalarizeMaskedMemIntrin::ID = 0; +INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin", + "Scalarize unsupported masked memory intrinsics", false, + false) +INITIALIZE_PASS_END(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin", + "Scalarize unsupported masked memory intrinsics", false, + false) + +FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { + return new ScalarizeMaskedMemIntrin(); +} + +// Translate a masked load intrinsic like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.load, label %else +// +// cond.load: ; preds = %0 +// %4 = getelementptr i32* %1, i32 0 +// %5 = load i32* %4 +// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// br label %else +// +// else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] +// %7 = extractelement <16 x i1> %mask, i32 1 +// %8 = icmp eq i1 %7, true +// br i1 %8, label %cond.load1, label %else2 +// +// cond.load1: ; preds = %else +// %9 = getelementptr i32* %1, i32 1 +// %10 = load i32* %9 +// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// br label %else2 +// +// else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] +// %12 = extractelement <16 x i1> %mask, i32 2 +// %13 = icmp eq i1 %12, true +// br i1 %13, label %cond.load4, label %else5 +// +static void scalarizeMaskedLoad(CallInst *CI) { + Value *Ptr = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + assert(VecType && "Unexpected return type of masked load intrinsic"); + + Type *EltTy = CI->getType()->getVectorElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = + isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + + if (isa<ConstantVector>(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = + Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_load = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = + Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %4 = extractelement <16 x i32> %val, i32 0 +// %5 = getelementptr i32* %1, i32 0 +// store i32 %4, i32* %5 +// br label %else +// +// else: ; preds = %0, %cond.store +// %6 = extractelement <16 x i1> %mask, i32 1 +// %7 = icmp eq i1 %6, true +// br i1 %7, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %8 = extractelement <16 x i32> %val, i32 1 +// %9 = getelementptr i32* %1, i32 1 +// store i32 %8, i32* %9 +// br label %else2 +// . . . +static void scalarizeMaskedStore(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast<VectorType>(Src->getType()); + assert(VecType && "Unexpected data type in masked store intrinsic"); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = + isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + if (isa<ConstantVector>(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + } + CI->eraseFromParent(); + return; + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_store = icmp eq i1 %mask_1, true + // br i1 %to_store, label %cond.store, label %else + // + Value *Predicate = + Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> %Mask, i32 0 +// % ToLoad0 = icmp eq i1 % Mask0, true +// br i1 % ToLoad0, label %cond.load, label %else +// +// cond.load: +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// % Load0 = load i32, i32* % Ptr0, align 4 +// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] +// % Mask1 = extractelement <16 x i1> %Mask, i32 1 +// % ToLoad1 = icmp eq i1 % Mask1, true +// br i1 % ToLoad1, label %cond.load1, label %else2 +// +// cond.load1: +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// % Load1 = load i32, i32* % Ptr1, align 4 +// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// br label %else2 +// . . . +// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void scalarizeMaskedGather(CallInst *CI) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa<ConstantVector>(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement( + VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %ToLoad1 = icmp eq i1 %Mask1, true + // br i1 %ToLoad1, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToLoad" + Twine(Idx)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), + "Res" + Twine(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> % Mask, i32 0 +// % ToStore0 = icmp eq i1 % Mask0, true +// br i1 %ToStore0, label %cond.store, label %else +// +// cond.store: +// % Elt0 = extractelement <16 x i32> %Src, i32 0 +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* % Ptr0, align 4 +// br label %else +// +// else: +// % Mask1 = extractelement <16 x i1> % Mask, i32 1 +// % ToStore1 = icmp eq i1 % Mask1, true +// br i1 % ToStore1, label %cond.store1, label %else2 +// +// cond.store1: +// % Elt1 = extractelement <16 x i32> %Src, i32 1 +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 % Elt1, i32* % Ptr1, align 4 +// br label %else2 +// . . . +static void scalarizeMaskedScatter(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + assert(isa<VectorType>(Src->getType()) && + "Unexpected data type in masked scatter intrinsic"); + assert(isa<VectorType>(Ptrs->getType()) && + isa<PointerType>(Ptrs->getType()->getVectorElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + unsigned VectorWidth = Src->getType()->getVectorNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa<ConstantVector>(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx + // % ToStore = icmp eq i1 % Mask1, true + // br i1 % ToStore, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToStore" + Twine(Idx)); + + // Create "cond" block + // + // % Elt1 = extractelement <16 x i32> %Src, i32 1 + // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 % Elt1, i32* % Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + bool EverMadeChange = false; + + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end();) { + BasicBlock *BB = &*I++; + bool ModifiedDTOnIteration = false; + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + if (ModifiedDTOnIteration) + break; + } + + EverMadeChange |= MadeChange; + } + + return EverMadeChange; +} + +bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { + bool MadeChange = false; + + BasicBlock::iterator CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++)) + MadeChange |= optimizeCallInst(CI, ModifiedDT); + if (ModifiedDT) + return true; + } + + return MadeChange; +} + +bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, + bool &ModifiedDT) { + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); + if (II) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::masked_load: { + // Scalarize unsupported vector masked load + if (!TTI->isLegalMaskedLoad(CI->getType())) { + scalarizeMaskedLoad(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_store: { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { + scalarizeMaskedStore(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_gather: { + if (!TTI->isLegalMaskedGather(CI->getType())) { + scalarizeMaskedGather(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_scatter: { + if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { + scalarizeMaskedScatter(CI); + ModifiedDT = true; + return true; + } + return false; + } + } + } + + return false; +} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c77046fdfaf5..caf5cb497a71 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -114,7 +114,7 @@ namespace { SmallPtrSet<SDNode *, 32> CombinedNodes; // AA - Used for DAG load/store alias analysis. - AliasAnalysis &AA; + AliasAnalysis *AA; /// When an instruction is simplified, add all users of the instruction to /// the work lists because they might get more simplified now. @@ -496,9 +496,9 @@ namespace { SDValue distributeTruncateThroughAnd(SDNode *N); public: - DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); MaximumLegalStoreInBits = 0; @@ -1729,10 +1729,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { NumLeftToConsider--; } - SDValue Result; - // If we've changed things around then replace token factor. if (Changed) { + SDValue Result; if (Ops.empty()) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); @@ -1749,13 +1748,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } } - - // Add users to worklist, since we may introduce a lot of new - // chained token factors while removing memory deps. - return CombineTo(N, Result, true /*add to worklist*/); + return Result; } - - return Result; + return SDValue(); } /// MERGE_VALUES can always be eliminated. @@ -2131,17 +2126,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue CarryIn = N->getOperand(2); + SDLoc DL(N); // canonicalize constant to RHS ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); if (N0C && !N1C) - return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), - N1, N0, CarryIn); + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); // fold (addcarry x, y, false) -> (uaddo x, y) if (isNullConstant(CarryIn)) - return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1); + return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; @@ -5313,17 +5308,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - // If the target supports masking y in (shl, y), - // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y) - if (TLI.isOperationLegal(ISD::SHL, VT) && - TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) { - if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { - if (Mask->getZExtValue() == OpSizeInBits - 1) { - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0)); - } - } - } - ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<<c2 @@ -5331,7 +5315,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 - if (isNullConstant(N0)) + if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) @@ -5522,18 +5506,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); - // If the target supports masking y in (sra, y), - // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y) - if (TLI.isOperationLegal(ISD::SRA, VT) && - TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) { - if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { - if (Mask->getZExtValue() == OpSizeInBits - 1) { - return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0)); - } - } - } - // Arithmetic shifting an all-sign-bit value is a no-op. + // fold (sra 0, x) -> 0 + // fold (sra -1, x) -> -1 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; @@ -5548,12 +5523,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); - // fold (sra 0, x) -> 0 - if (isNullConstant(N0)) - return N0; - // fold (sra -1, x) -> -1 - if (isAllOnesConstant(N0)) - return N0; // fold (sra x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); @@ -5691,17 +5660,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); - // If the target supports masking y in (srl, y), - // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y) - if (TLI.isOperationLegal(ISD::SRL, VT) && - TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) { - if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) { - if (Mask->getZExtValue() == OpSizeInBits - 1) { - return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0)); - } - } - } - // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -5714,7 +5672,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 - if (isNullConstant(N0)) + if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) @@ -7365,14 +7323,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueSizeInBits(), std::min(Op.getValueSizeInBits(), VT.getSizeInBits())); - if (TruncatedBits.isSubsetOf(Known.Zero)) { - if (VT.bitsGT(Op.getValueType())) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); - if (VT.bitsLT(Op.getValueType())) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - - return Op; - } + if (TruncatedBits.isSubsetOf(Known.Zero)) + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } // fold (zext (truncate (load x))) -> (zext (smaller load x)) @@ -7419,14 +7371,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { - SDValue Op = N0.getOperand(0); - if (SrcVT.bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } else if (SrcVT.bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } + SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); + AddToWorklist(Op.getNode()); return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } } @@ -7440,11 +7386,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType()) || !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); - if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); - } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); - } + X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDLoc DL(N); @@ -7669,14 +7611,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (truncate x)) - if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue TruncOp = N0.getOperand(0); - if (TruncOp.getValueType() == VT) - return TruncOp; // x iff x size == zext size. - if (TruncOp.getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); - } + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); // Fold (aext (and (trunc x), cst)) -> (and x, cst) // if the trunc is not free. @@ -7687,11 +7623,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getValueType())) { SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); - if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); - } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, DL, VT, X); - } + X = DAG.getAnyExtOrTrunc(X, DL, VT); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, @@ -14868,6 +14800,55 @@ SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { return SDValue(); } +// Combine shuffles of splat-shuffles of the form: +// shuffle (shuffle V, undef, splat-mask), undef, M +// If splat-mask contains undef elements, we need to be careful about +// introducing undef's in the folded mask which are not the result of composing +// the masks of the shuffles. +static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, + ShuffleVectorSDNode *Splat, + SelectionDAG &DAG) { + ArrayRef<int> SplatMask = Splat->getMask(); + assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); + + // Prefer simplifying to the splat-shuffle, if possible. This is legal if + // every undef mask element in the splat-shuffle has a corresponding undef + // element in the user-shuffle's mask or if the composition of mask elements + // would result in undef. + // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): + // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] + // In this case it is not legal to simplify to the splat-shuffle because we + // may be exposing the users of the shuffle an undef element at index 1 + // which was not there before the combine. + // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] + // In this case the composition of masks yields SplatMask, so it's ok to + // simplify to the splat-shuffle. + // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] + // In this case the composed mask includes all undef elements of SplatMask + // and in addition sets element zero to undef. It is safe to simplify to + // the splat-shuffle. + auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask, + ArrayRef<int> SplatMask) { + for (unsigned i = 0, e = UserMask.size(); i != e; ++i) + if (UserMask[i] != -1 && SplatMask[i] == -1 && + SplatMask[UserMask[i]] != -1) + return false; + return true; + }; + if (CanSimplifyToExistingSplat(UserMask, SplatMask)) + return SDValue(Splat, 0); + + // Create a new shuffle with a mask that is composed of the two shuffles' + // masks. + SmallVector<int, 32> NewMask; + for (int Idx : UserMask) + NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); + + return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), + Splat->getOperand(0), Splat->getOperand(1), + NewMask); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -14914,6 +14895,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // A shuffle of a single vector that is a splat can always be folded. + if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) + if (N1->isUndef() && N0Shuf->isSplat()) + return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); + // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { @@ -16381,17 +16367,17 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { UseAA = false; #endif - if (UseAA && + if (UseAA && AA && Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; AliasResult AAResult = - AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, - UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, - UseTBAA ? Op1->getAAInfo() : AAMDNodes())); + AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, + UseTBAA ? Op1->getAAInfo() : AAMDNodes()) ); if (AAResult == NoAlias) return false; } @@ -16605,7 +16591,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { } /// This is the entry point for the file. -void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOpt::Level OptLevel) { /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 8c98e3740f6d..5003b79974eb 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -622,7 +622,7 @@ bool FastISel::selectStackmap(const CallInst *I) { // have to worry about calling conventions and target-specific lowering code. // Instead we perform the call lowering right here. // - // CALLSEQ_START(0...) + // CALLSEQ_START(0, 0...) // STACKMAP(id, nbytes, ...) // CALLSEQ_END(0, 0) // @@ -1150,16 +1150,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; } - unsigned Offset = 0; + // Byval arguments with frame indices were already handled after argument + // lowering and before isel. + const auto *Arg = + dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); + if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + return true; + Optional<MachineOperand> Op; - if (const auto *Arg = dyn_cast<Argument>(Address)) - // Some arguments' frame index is recorded during argument lowering. - Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Op = MachineOperand::CreateFI(Offset); - if (!Op) - if (unsigned Reg = lookUpRegForValue(Address)) - Op = MachineOperand::CreateReg(Reg, false); + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used // here but it has no other uses, then we have a problem. E.g., diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index cdf4d3a8b4e5..606b8952f3c1 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -85,7 +85,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF = &mf; TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - MachineModuleInfo &MMI = MF->getMMI(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); @@ -214,33 +213,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I))) InitializeRegForValue(&I); - // Collect llvm.dbg.declare information. This is done now instead of - // during the initial isel pass through the IR so that it is done - // in a predictable order. - if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) { - assert(DI->getVariable() && "Missing variable"); - assert(DI->getDebugLoc() && "Missing location"); - if (MMI.hasDebugInfo()) { - // Don't handle byval struct arguments or VLAs, for example. - // Non-byval arguments are handled here (they refer to the stack - // temporary alloca at this point). - const Value *Address = DI->getAddress(); - if (Address) { - if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) - Address = BCI->getOperand(0); - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { - DenseMap<const AllocaInst *, int>::iterator SI = - StaticAllocaMap.find(AI); - if (SI != StaticAllocaMap.end()) { // Check for VLAs. - int FI = SI->second; - MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(), - FI, DI->getDebugLoc()); - } - } - } - } - } - // Decide the preferred extend type for a value. PreferredExtendType[&I] = getPreferredExtendForValue(&I); } @@ -510,12 +482,11 @@ void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, /// If the argument does not have any assigned frame index then 0 is /// returned. int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { - DenseMap<const Argument *, int>::iterator I = - ByValArgFrameIndexMap.find(A); + auto I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); - return 0; + return INT_MAX; } unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2654b3ad7a62..9a47a914df91 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1493,7 +1493,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); @@ -4187,6 +4187,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ReplacedNode(Node); break; } + case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::UDIV: diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index cde4331cc42d..4c3b514856b7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -675,6 +675,7 @@ private: // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); SDValue SplitVecOp_TruncateHelper(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 97a7fab6efd0..ff0e609803d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1513,6 +1513,22 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND_VECTOR_INREG: Res = SplitVecOp_ExtVecInRegOp(N); break; + + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = SplitVecOp_VECREDUCE(N, OpNo); + break; } } @@ -1565,6 +1581,48 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); } +SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + + SDValue VecOp = N->getOperand(OpNo); + EVT VecVT = VecOp.getValueType(); + assert(VecVT.isVector() && "Can only split reduce vector operand"); + GetSplitVector(VecOp, Lo, Hi); + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); + + bool NoNaN = N->getFlags().hasNoNaNs(); + unsigned CombineOpc = 0; + switch (N->getOpcode()) { + case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break; + case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break; + case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break; + case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break; + case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break; + case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break; + case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break; + case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break; + case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break; + case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; + case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; + case ISD::VECREDUCE_FMAX: + CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN; + break; + case ISD::VECREDUCE_FMIN: + CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN; + break; + default: + llvm_unreachable("Unexpected reduce ISD node"); + } + + // Use the appropriate scalar instruction on the split subvectors before + // reducing the now partially reduced smaller vector. + SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi); + return DAG.getNode(N->getOpcode(), dl, ResVT, Partial); +} + SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d605a1dc1c20..057badcd6b74 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2217,10 +2217,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - unsigned TrailZ = Known.Zero.countTrailingOnes() + - Known2.Zero.countTrailingOnes(); - unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() + - Known2.Zero.countLeadingOnes(), + unsigned TrailZ = Known.countMinTrailingZeros() + + Known2.countMinTrailingZeros(); + unsigned LeadZ = std::max(Known.countMinLeadingZeros() + + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; Known.resetAll(); @@ -2233,13 +2233,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); - unsigned LeadZ = Known2.Zero.countLeadingOnes(); + unsigned LeadZ = Known2.countMinLeadingZeros(); computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); - unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros(); - if (RHSUnknownLeadingOnes != BitWidth) - LeadZ = std::min(BitWidth, - LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); + if (RHSMaxLeadingZeros != BitWidth) + LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); Known.Zero.setHighBits(LeadZ); break; @@ -2359,7 +2358,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::CTTZ_ZERO_UNDEF: { computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. - unsigned PossibleTZ = Known2.One.countTrailingZeros(); + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); unsigned LowBits = Log2_32(PossibleTZ) + 1; Known.Zero.setBitsFrom(LowBits); break; @@ -2368,7 +2367,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::CTLZ_ZERO_UNDEF: { computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. - unsigned PossibleLZ = Known2.One.countLeadingZeros(); + unsigned PossibleLZ = Known2.countMaxLeadingZeros(); unsigned LowBits = Log2_32(PossibleLZ) + 1; Known.Zero.setBitsFrom(LowBits); break; @@ -2376,7 +2375,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::CTPOP: { computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If we know some of the bits are zero, they can't be one. - unsigned PossibleOnes = BitWidth - Known2.Zero.countPopulation(); + unsigned PossibleOnes = Known2.countMaxPopulation(); Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); break; } @@ -2493,13 +2492,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // going to be 0 in the result. Both addition and complement operations // preserve the low zero bits. computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); - unsigned KnownZeroLow = Known2.Zero.countTrailingOnes(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); if (KnownZeroLow == 0) break; computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); - KnownZeroLow = std::min(KnownZeroLow, - Known2.Zero.countTrailingOnes()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); Known.Zero.setLowBits(KnownZeroLow); break; } @@ -2526,15 +2524,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); - unsigned KnownZeroHigh = Known2.Zero.countLeadingOnes(); - unsigned KnownZeroLow = Known2.Zero.countTrailingOnes(); + unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); - KnownZeroHigh = std::min(KnownZeroHigh, - Known2.Zero.countLeadingOnes()); - KnownZeroLow = std::min(KnownZeroLow, - Known2.Zero.countTrailingOnes()); + KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { // With ADDE and ADDCARRY, a carry bit may be added in, so we can only @@ -2594,8 +2590,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); - uint32_t Leaders = std::max(Known.Zero.countLeadingOnes(), - Known2.Zero.countLeadingOnes()); + uint32_t Leaders = + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); Known.resetAll(); Known.Zero.setHighBits(Leaders); break; @@ -2711,8 +2707,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // UMIN - we know that the result will have the maximum of the // known zero leading bits of the inputs. - unsigned LeadZero = Known.Zero.countLeadingOnes(); - LeadZero = std::max(LeadZero, Known2.Zero.countLeadingOnes()); + unsigned LeadZero = Known.countMinLeadingZeros(); + LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); Known.Zero &= Known2.Zero; Known.One &= Known2.One; @@ -2726,8 +2722,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, // UMAX - we know that the result will have the maximum of the // known one leading bits of the inputs. - unsigned LeadOne = Known.One.countLeadingOnes(); - LeadOne = std::max(LeadOne, Known2.One.countLeadingOnes()); + unsigned LeadOne = Known.countMinLeadingOnes(); + LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); Known.Zero &= Known2.Zero; Known.One &= Known2.One; @@ -2843,8 +2839,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // Fall back to computeKnownBits to catch other known cases. KnownBits Known; computeKnownBits(Val, Known); - return (Known.Zero.countPopulation() == BitWidth - 1) && - (Known.One.countPopulation() == 1); + return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); } unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { @@ -2860,6 +2855,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); + unsigned NumElts = DemandedElts.getBitWidth(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2903,6 +2899,39 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } return Tmp; + case ISD::VECTOR_SHUFFLE: { + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + int M = SVN->getMaskElt(i); + if (!DemandedElts[i]) + continue; + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + if (M < 0) + return 1; + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + Tmp = UINT_MAX; + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits fall-back. + if (Tmp == 1) + break; + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + case ISD::SIGN_EXTEND: case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); @@ -3142,14 +3171,36 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } - case ISD::EXTRACT_SUBVECTOR: - return ComputeNumSignBits(Op.getOperand(0), Depth + 1); + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + } + return ComputeNumSignBits(Src, Depth + 1); + } case ISD::CONCAT_VECTORS: - // Determine the minimum number of sign bits across all input vectors. - // Early out if the result is already 1. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); - for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) - Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1)); + // Determine the minimum number of sign bits across all demanded + // elts of the input vectors. Early out if the result is already 1. + Tmp = UINT_MAX; + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!DemandedSub) + continue; + Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -3543,7 +3594,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Operand.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); else if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, DL, VT); @@ -3559,8 +3610,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Operand.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) - return getNode(ISD::ZERO_EXTEND, DL, VT, - Operand.getNode()->getOperand(0)); + return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0)); else if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. return getConstant(0, DL, VT); @@ -3579,13 +3629,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); else if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); // (ext (trunx x)) -> x if (OpOpcode == ISD::TRUNCATE) { - SDValue OpOp = Operand.getNode()->getOperand(0); + SDValue OpOp = Operand.getOperand(0); if (OpOp.getValueType() == VT) return OpOp; } @@ -3601,16 +3651,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Operand.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); if (OpOpcode == ISD::TRUNCATE) - return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. - if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + if (Operand.getOperand(0).getValueType().getScalarType() .bitsLT(VT.getScalarType())) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) - return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - return Operand.getNode()->getOperand(0); + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); + if (Operand.getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); + return Operand.getOperand(0); } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); @@ -3665,15 +3715,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? - return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0), - Operand.getNode()->getFlags()); + return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), + Operand.getOperand(0), Operand.getNode()->getFlags()); if (OpOpcode == ISD::FNEG) // --X -> X - return Operand.getNode()->getOperand(0); + return Operand.getOperand(0); break; case ISD::FABS: if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) - return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); break; } @@ -5970,7 +6019,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); - case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 1: return getNode(Opcode, DL, VT, Ops[0], Flags); case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; @@ -7520,9 +7569,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); KnownBits Known(PtrWidth); - llvm::computeKnownBits(const_cast<GlobalValue *>(GV), Known, - getDataLayout()); - unsigned AlignBits = Known.Zero.countTrailingOnes(); + llvm::computeKnownBits(GV, Known, getDataLayout()); + unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) return MinAlign(Align, GVOffset); @@ -7621,7 +7669,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, return false; // FIXME: The widths are based on this node's type, but build vectors can - // truncate their operands. + // truncate their operands. SplatValue = APInt(VecWidth, 0); SplatUndef = APInt(VecWidth, 0); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 50313e2da884..57d340c41c39 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -661,7 +661,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; - unsigned NumZeroBits = LOI->Known.Zero.countLeadingOnes(); + unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); if (NumZeroBits == RegSize) { // The current value is a zero. @@ -811,9 +811,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, const TargetLibraryInfo *li) { - AA = &aa; + AA = aa; GFI = gfi; LibInfo = li; DL = &DAG.getDataLayout(); @@ -3423,7 +3423,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(MemoryLocation( + else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); @@ -3535,8 +3535,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { Type *Ty = I.getType(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - assert(!AA->pointsToConstantMemory(MemoryLocation( - SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && + assert((!AA || !AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) && "load_from_swift_error should not be constant memory"); SmallVector<EVT, 4> ValueVTs; @@ -3817,7 +3817,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( + bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation( PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); @@ -3861,7 +3861,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory(MemoryLocation( + AA && AA->pointsToConstantMemory(MemoryLocation( BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. @@ -4676,7 +4676,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. - if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + int FI = FuncInfo.getArgumentFrameIndex(Arg); + if (FI != INT_MAX) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4927,6 +4928,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + // Byval arguments with frame indices were already handled after argument + // lowering and before isel. + const auto *Arg = + dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); + if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + return nullptr; + SDValue &N = NodeMap[Address]; if (!N.getNode() && isa<Argument>(Address)) // Check unused arguments map. @@ -4957,20 +4965,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N)) { - // If variable is pinned by a alloca in dominating bb then - // use StaticAllocaMap. - if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { - if (AI->getParent() != DI.getParent()) { - DenseMap<const AllocaInst*, int>::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - return nullptr; - } - } - } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -5651,7 +5645,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(MF.getName()), Idx); + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) @@ -5672,7 +5666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. @@ -5737,6 +5731,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; + + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: { + visitVectorReduce(I, Intrinsic); + return nullptr; + } + } } @@ -5982,7 +5994,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. - if (Builder.AA->pointsToConstantMemory(PtrVal)) { + if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { @@ -7422,11 +7434,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // - // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = CALLSEQ_START(chain, 0, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // - Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InFlag = Chain.getValue(1); // Add the <id> and <numBytes> constants. @@ -7616,6 +7628,76 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } +void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, + unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2; + if (I.getNumArgOperands() > 1) + Op2 = getValue(I.getArgOperand(1)); + SDLoc dl = getCurSDLoc(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue Res; + FastMathFlags FMF; + if (isa<FPMathOperator>(I)) + FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + SDFlags.setNoNaNs(FMF.noNaNs()); + + switch (Intrinsic) { + case Intrinsic::experimental_vector_reduce_fadd: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_fmul: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_add: + Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_mul: + Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_and: + Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_or: + Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_xor: + Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smax: + Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smin: + Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umax: + Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umin: + Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_fmax: { + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); + break; + } + case Intrinsic::experimental_vector_reduce_fmin: { + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); + break; + } + default: + llvm_unreachable("Unhandled vector reduce intrinsic"); + } + setValue(&I, Res); +} + /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e9989058ae5..bdaee858da61 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -604,11 +604,11 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), FuncInfo(funcinfo), + DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo), HasTailCall(false) { } - void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + void init(GCFunctionInfo *gfi, AliasAnalysis *AA, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare @@ -909,6 +909,8 @@ private: void visitGCRelocate(const GCRelocateInst &I); void visitGCResult(const GCResultInst &I); + void visitVectorReduce(const CallInst &I, unsigned Intrinsic); + void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 26dd45ef933f..c37d7080f2c5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -346,6 +346,19 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE: return "setfalse"; case ISD::SETFALSE2: return "setfalse2"; } + case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; + case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; + case ISD::VECREDUCE_ADD: return "vecreduce_add"; + case ISD::VECREDUCE_MUL: return "vecreduce_mul"; + case ISD::VECREDUCE_AND: return "vecreduce_and"; + case ISD::VECREDUCE_OR: return "vecreduce_or"; + case ISD::VECREDUCE_XOR: return "vecreduce_xor"; + case ISD::VECREDUCE_SMAX: return "vecreduce_smax"; + case ISD::VECREDUCE_SMIN: return "vecreduce_smin"; + case ISD::VECREDUCE_UMAX: return "vecreduce_umax"; + case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; + case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; + case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3aabdaeaa094..5e0feccb6b4c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -299,7 +300,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, FuncInfo(new FunctionLoweringInfo()), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), - GFI(), + AA(), GFI(), OptLevel(OL), DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); @@ -317,7 +318,8 @@ SelectionDAGISel::~SelectionDAGISel() { } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<AAResultsWrapperPass>(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<StackProtector>(); AU.addPreserved<StackProtector>(); @@ -394,7 +396,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; ORE = make_unique<OptimizationRemarkEmitter>(&Fn); @@ -406,12 +407,22 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CurDAG->init(*MF, *ORE); FuncInfo->set(Fn, *MF, CurDAG); + // Now get the optional analyzes if we want to. + // This is based on the possibly changed OptLevel (after optnone is taken + // into account). That's unfortunate but OK because it just means we won't + // ask for passes that have been required anyway. + if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else FuncInfo->BPI = nullptr; - SDB->init(GFI, *AA, LibInfo); + if (OptLevel != CodeGenOpt::None) + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + else + AA = nullptr; + + SDB->init(GFI, AA, LibInfo); MF->setHasInlineAsm(false); @@ -715,7 +726,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -747,7 +758,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber @@ -781,7 +792,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -807,7 +818,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber @@ -1145,6 +1156,51 @@ static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo, } } +/// Collect llvm.dbg.declare information. This is done after argument lowering +/// in case the declarations refer to arguments. +static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { + MachineFunction *MF = FuncInfo->MF; + const DataLayout &DL = MF->getDataLayout(); + for (const BasicBlock &BB : *FuncInfo->Fn) { + for (const Instruction &I : BB) { + const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); + if (!DI) + continue; + + assert(DI->getVariable() && "Missing variable"); + assert(DI->getDebugLoc() && "Missing location"); + const Value *Address = DI->getAddress(); + if (!Address) + continue; + + // Look through casts and constant offset GEPs. These mostly come from + // inalloca. + APInt Offset(DL.getPointerSizeInBits(0), 0); + Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + // Check if the variable is a static alloca or a byval or inalloca + // argument passed in memory. If it is not, then we will ignore this + // intrinsic and handle this during isel like dbg.value. + int FI = INT_MAX; + if (const auto *AI = dyn_cast<AllocaInst>(Address)) { + auto SI = FuncInfo->StaticAllocaMap.find(AI); + if (SI != FuncInfo->StaticAllocaMap.end()) + FI = SI->second; + } else if (const auto *Arg = dyn_cast<Argument>(Address)) + FI = FuncInfo->getArgumentFrameIndex(Arg); + + if (FI == INT_MAX) + continue; + + DIExpression *Expr = DI->getExpression(); + if (Offset.getBoolValue()) + Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, + Offset.getZExtValue()); + MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); + } + } +} + /// Propagate swifterror values through the machine function CFG. static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { auto *TLI = FuncInfo->TLI; @@ -1317,6 +1373,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB); + processDbgDeclares(FuncInfo); + // Iterate over all basic blocks in the function. for (const BasicBlock *LLVMBB : RPOT) { if (OptLevel != CodeGenOpt::None) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 23f597db140c..befbd80d7965 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -417,11 +417,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && TLI.isZExtFree(SmallVT, Op.getValueType())) { // We found a type with free casts. - SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, - DAG.getNode(ISD::TRUNCATE, dl, SmallVT, - Op.getNode()->getOperand(0)), - DAG.getNode(ISD::TRUNCATE, dl, SmallVT, - Op.getNode()->getOperand(1))); + SDValue X = DAG.getNode( + Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1))); bool NeedZext = DemandedSize > SmallVTBits; SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, dl, Op.getValueType(), X); @@ -817,7 +816,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits // are not demanded. This will likely allow the anyext to be folded away. if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { - SDValue InnerOp = InOp.getNode()->getOperand(0); + SDValue InnerOp = InOp.getOperand(0); EVT InnerVT = InnerOp.getValueType(); unsigned InnerBits = InnerVT.getSizeInBits(); if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits && diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 4837495777da..2638702da152 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -282,8 +282,14 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, if (!Restore) Restore = &MBB; - else + else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it + // means the block never returns. If that's the + // case, we don't want to call + // `findNearestCommonDominator`, which will + // return `Restore`. Restore = MPDT->findNearestCommonDominator(Restore, &MBB); + else + Restore = nullptr; // Abort, we can't find a restore point in this case. // Make sure we would be able to insert the restore code before the // terminator. @@ -293,7 +299,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { - Restore = nullptr; + Restore = nullptr; // Abort, we can't find a restore point in this case. break; } // Look for a restore point that post-dominates all the successors. @@ -419,7 +425,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF, } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (MF.empty() || !isShrinkWrapEnabled(MF)) + if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index ab578df4069d..e9eff4d0acb2 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -93,8 +93,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) { doubleUnderDataTy, // __data VoidPtrTy, // __personality VoidPtrTy, // __lsda - doubleUnderJBufTy, // __jbuf - nullptr); + doubleUnderJBufTy // __jbuf + ); return true; } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 34892680aceb..1d232c71d824 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -232,7 +232,11 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, if (!MD) return nullptr; - auto *VM = dyn_cast<ValueAsMetadata>(MD->getOperand(0)); + const MDOperand &Op = MD->getOperand(0); + if (!Op.get()) + return nullptr; + + auto *VM = dyn_cast<ValueAsMetadata>(Op); if (!VM) report_fatal_error("MD_associated operand is not ValueAsMetadata"); diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 150195f5f85b..e6c5d8753b83 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -487,6 +487,14 @@ void TargetPassConfig::addIRPasses() { // Insert calls to mcount-like functions. addPass(createCountingFunctionInserterPass()); + + // Add scalarization of target's unsupported masked memory intrinsics pass. + // the unsupported intrinsic will be replaced with a chain of basic blocks, + // that stores/loads element one-by-one if the appropriate mask bit is set. + addPass(createScalarizeMaskedMemIntrinPass()); + + // Expand reduction intrinsics into shuffle sequences if the target wants to. + addPass(createExpandReductionsPass()); } /// Turn exception handling constructs into something the code generators can @@ -607,6 +615,9 @@ void TargetPassConfig::addMachinePasses() { addPass(&LocalStackSlotAllocationID, false); } + if (getOptLevel() != CodeGenOpt::None) + addPass(&LiveRangeShrinkID); + // Run pre-ra passes. addPreRegAlloc(); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 75359fe3c0ea..7392c8327148 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -155,7 +155,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired<AAResultsWrapperPass>(); + AU.addUsedIfAvailable<AAResultsWrapperPass>(); AU.addUsedIfAvailable<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreserved<SlotIndexes>(); @@ -1627,7 +1627,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { InstrItins = MF->getSubtarget().getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); LIS = getAnalysisIfAvailable<LiveIntervals>(); - AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + if (auto *AAPass = getAnalysisIfAvailable<AAResultsWrapperPass>()) + AA = &AAPass->getAAResults(); + else + AA = nullptr; OptLevel = TM.getOptLevel(); bool MadeChange = false; diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index f085132b6a94..407fd9b162e9 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -206,11 +206,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (InputReg != OutputReg) { MachineRegisterInfo &MRI = F.getRegInfo(); unsigned InputSub = Input.getSubReg(); - if (InputSub == 0) { - MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)); + if (InputSub == 0 && + MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) { MRI.replaceRegWith(OutputReg, InputReg); } else { - // The input register to the PHI has a subregister: + // The input register to the PHI has a subregister or it can't be + // constrained to the proper register class: // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); |