summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-16 19:46:52 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-16 19:46:52 +0000
commit6b3f41ed88e8e440e11a4fbf20b6600529f80049 (patch)
tree928b056f24a634d628c80238dbbf10d41b1a71d5 /lib/CodeGen
parentc46e6a5940c50058e00c0c5f9123fd82e338d29a (diff)
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp14
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp42
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h22
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp89
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h20
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h5
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp12
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp31
-rw-r--r--lib/CodeGen/CMakeLists.txt3
-rw-r--r--lib/CodeGen/CodeGen.cpp4
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp548
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp5
-rw-r--r--lib/CodeGen/ExpandReductions.cpp167
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp10
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp9
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp8
-rw-r--r--lib/CodeGen/IfConversion.cpp30
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp211
-rw-r--r--lib/CodeGen/LiveVariables.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp30
-rw-r--r--lib/CodeGen/MachineVerifier.cpp4
-rw-r--r--lib/CodeGen/PHIElimination.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp7
-rw-r--r--lib/CodeGen/SafeStack.cpp172
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp660
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp176
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp58
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp162
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp138
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp74
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp11
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp12
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp4
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp6
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp11
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp7
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp7
47 files changed, 1862 insertions, 1002 deletions
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 87b45c001de4..98163bffb60b 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -767,7 +767,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// If our DISubprogram name is empty, use the mangled name.
if (FuncName.empty())
- FuncName = GlobalValue::getRealLinkageName(GV->getName());
+ FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName());
// Emit a symbol subsection, required by VS2012+ to find function boundaries.
OS.AddComment("Symbol subsection for " + Twine(FuncName));
@@ -888,13 +888,21 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
if (!Scope)
continue;
+ // If the variable has an attached offset expression, extract it.
+ // FIXME: Try to handle DW_OP_deref as well.
+ int64_t ExprOffset = 0;
+ if (VI.Expr)
+ if (!VI.Expr->extractIfOffset(ExprOffset))
+ continue;
+
// Get the frame register used and the offset.
unsigned FrameReg = 0;
int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
// Calculate the label ranges.
- LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset);
+ LocalVarDefRange DefRange =
+ createDefRangeMem(CVReg, FrameOffset + ExprOffset);
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
@@ -2194,7 +2202,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
if (GV->hasComdat()) {
MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
- Twine(GlobalValue::getRealLinkageName(GV->getName())));
+ Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
switchToDebugSectionForSymbol(GVSym);
EndLabel = beginCVSubsection(ModuleDebugFragmentKind::Symbols);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 1d63e33a4d33..826162ad47c4 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -129,10 +129,9 @@ bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) {
}
void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
- assert(Asm);
PrevInstBB = nullptr;
- if (!hasDebugInfo(MMI, MF)) {
+ if (!Asm || !hasDebugInfo(MMI, MF)) {
skippedNonDebugFunction();
return;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 738e062cb93f..e172712cf889 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -440,7 +440,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
auto *InlinedSP = getDISubprogram(DS);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP];
+ DIE *OriginDIE = getAbstractSPDies()[InlinedSP];
assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
@@ -634,7 +634,7 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
LexicalScope *Scope) {
- DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
+ DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()];
if (AbsDef)
return;
@@ -696,7 +696,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
DIE *D = getDIE(SP);
- if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) {
+ if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) {
if (D)
// If this subprogram has an abstract definition, reference that
addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
@@ -708,6 +708,42 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
}
}
+void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) {
+ DbgVariable *AbsVar = getExistingAbstractVariable(
+ InlinedVariable(Var.getVariable(), Var.getInlinedAt()));
+ auto *VariableDie = Var.getDIE();
+ if (AbsVar && AbsVar->getDIE()) {
+ addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
+ *AbsVar->getDIE());
+ } else
+ applyVariableAttributes(Var, *VariableDie);
+}
+
+DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) {
+ const DILocalVariable *Cleansed;
+ return getExistingAbstractVariable(IV, Cleansed);
+}
+
+// Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(
+ InlinedVariable IV, const DILocalVariable *&Cleansed) {
+ // More then one inlined variable corresponds to one abstract variable.
+ Cleansed = IV.first;
+ auto &AbstractVariables = getAbstractVariables();
+ auto I = AbstractVariables.find(Cleansed);
+ if (I != AbstractVariables.end())
+ return I->second.get();
+ return nullptr;
+}
+
+void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var,
+ LexicalScope *Scope) {
+ assert(Scope && Scope->isAbstractScope());
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
+ DU->addScopeVariable(Scope, AbsDbgVariable.get());
+ getAbstractVariables()[Var] = std::move(AbsDbgVariable);
+}
+
void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
if (!Skeleton) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 20a415150b4d..77e9e671529f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -68,6 +68,9 @@ class DwarfCompileUnit final : public DwarfUnit {
// ranges/locs.
const MCSymbol *BaseAddress;
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+
/// \brief Construct a DIE for the given DbgVariable without initializing the
/// DbgVariable's DIE reference.
DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);
@@ -76,6 +79,18 @@ class DwarfCompileUnit final : public DwarfUnit {
bool includeMinimalInlineScopes() const;
+ DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return AbstractSPDies;
+ return DU->getAbstractSPDies();
+ }
+
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return AbstractVariables;
+ return DU->getAbstractVariables();
+ }
+
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
@@ -189,6 +204,13 @@ public:
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
void finishSubprogramDefinition(const DISubprogram *SP);
+ void finishVariableDefinition(const DbgVariable &Var);
+ /// Find abstract variable associated with Var.
+ typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
+ DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
+ const DILocalVariable *&Cleansed);
+ DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
+ void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 6f442f5c3172..3410b98d7776 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -71,6 +71,10 @@ static cl::opt<bool> GenerateARangeSection("generate-arange-section",
cl::desc("Generate dwarf aranges"),
cl::init(false));
+static cl::opt<bool> SplitDwarfCrossCuReferences(
+ "split-dwarf-cross-cu-references", cl::Hidden,
+ cl::desc("Enable cross-cu references in DWO files"), cl::init(false));
+
namespace {
enum DefaultOnOff { Default, Enable, Disable };
}
@@ -362,21 +366,29 @@ template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
F(*SkelCU);
}
-void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+bool DwarfDebug::shareAcrossDWOCUs() const {
+ return SplitDwarfCrossCuReferences;
+}
+
+void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
+ LexicalScope *Scope) {
assert(Scope && Scope->getScopeNode());
assert(Scope->isAbstractScope());
assert(!Scope->getInlinedAt());
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
- ProcessedSPNodes.insert(SP);
-
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
auto &CU = *CUMap.lookup(SP->getUnit());
- forBothCUs(CU, [&](DwarfCompileUnit &CU) {
+ if (auto *SkelCU = CU.getSkeleton()) {
+ (shareAcrossDWOCUs() ? CU : SrcCU)
+ .constructAbstractSubprogramScopeDIE(Scope);
+ if (CU.getCUNode()->getSplitDebugInlining())
+ SkelCU->constructAbstractSubprogramScopeDIE(Scope);
+ } else {
CU.constructAbstractSubprogramScopeDIE(Scope);
- });
+ }
}
void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
@@ -564,13 +576,7 @@ void DwarfDebug::finishVariableDefinitions() {
// DIE::getUnit isn't simple - it walks parent pointers, etc.
DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
assert(Unit);
- DbgVariable *AbsVar = getExistingAbstractVariable(
- InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
- if (AbsVar && AbsVar->getDIE()) {
- Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
- *AbsVar->getDIE());
- } else
- Unit->applyVariableAttributes(*Var, *VariableDie);
+ Unit->finishVariableDefinition(*Var);
}
}
@@ -718,58 +724,32 @@ void DwarfDebug::endModule() {
}
// clean up.
- AbstractVariables.clear();
+ // FIXME: AbstractVariables.clear();
}
-// Find abstract variable, if any, associated with Var.
-DbgVariable *
-DwarfDebug::getExistingAbstractVariable(InlinedVariable IV,
- const DILocalVariable *&Cleansed) {
- // More then one inlined variable corresponds to one abstract variable.
- Cleansed = IV.first;
- auto I = AbstractVariables.find(Cleansed);
- if (I != AbstractVariables.end())
- return I->second.get();
- return nullptr;
-}
-
-DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
- const DILocalVariable *Cleansed;
- return getExistingAbstractVariable(IV, Cleansed);
-}
-
-void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
- LexicalScope *Scope) {
- assert(Scope && Scope->isAbstractScope());
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
- InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
- AbstractVariables[Var] = std::move(AbsDbgVariable);
-}
-
-void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV,
+void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV,
const MDNode *ScopeNode) {
const DILocalVariable *Cleansed = nullptr;
- if (getExistingAbstractVariable(IV, Cleansed))
+ if (CU.getExistingAbstractVariable(IV, Cleansed))
return;
- createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
+ CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(
cast<DILocalScope>(ScopeNode)));
}
-void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
+void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU,
InlinedVariable IV, const MDNode *ScopeNode) {
const DILocalVariable *Cleansed = nullptr;
- if (getExistingAbstractVariable(IV, Cleansed))
+ if (CU.getExistingAbstractVariable(IV, Cleansed))
return;
if (LexicalScope *Scope =
LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode)))
- createAbstractVariable(Cleansed, Scope);
+ CU.createAbstractVariable(Cleansed, Scope);
}
-
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
- DenseSet<InlinedVariable> &Processed) {
+ DwarfCompileUnit &TheCU, DenseSet<InlinedVariable> &Processed) {
for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
if (!VI.Var)
continue;
@@ -784,7 +764,7 @@ void DwarfDebug::collectVariableInfoFromMFTable(
if (!Scope)
continue;
- ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode());
+ ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode());
auto RegVar = make_unique<DbgVariable>(Var.first, Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (InfoHolder.addScopeVariable(Scope, RegVar.get()))
@@ -955,9 +935,10 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
}
}
-DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope,
+DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope,
InlinedVariable IV) {
- ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode());
+ ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode());
ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second));
InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get());
return ConcreteVariables.back().get();
@@ -980,7 +961,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
DenseSet<InlinedVariable> &Processed) {
// Grab the variable info that was squirreled away in the MMI side-table.
- collectVariableInfoFromMFTable(Processed);
+ collectVariableInfoFromMFTable(TheCU, Processed);
for (const auto &I : DbgValues) {
InlinedVariable IV = I.first;
@@ -1002,7 +983,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
continue;
Processed.insert(IV);
- DbgVariable *RegVar = createConcreteVariable(*Scope, IV);
+ DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV);
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
@@ -1038,7 +1019,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
for (const DILocalVariable *DV : SP->getVariables()) {
if (Processed.insert(InlinedVariable(DV, nullptr)).second)
if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope()))
- createConcreteVariable(*Scope, InlinedVariable(DV, nullptr));
+ createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr));
}
}
@@ -1229,12 +1210,12 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
for (const DILocalVariable *DV : SP->getVariables()) {
if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second)
continue;
- ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr),
+ ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr),
DV->getScope());
assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
&& "ensureAbstractVariableIsCreated inserted abstract scopes");
}
- constructAbstractSubprogramScopeDIE(AScope);
+ constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
ProcessedSPNodes.insert(SP);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 8a96e7867b6e..b9c5aa9ffb23 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -210,7 +210,6 @@ class DwarfDebug : public DebugHandlerBase {
DenseMap<const MCSymbol *, uint64_t> SymSize;
/// Collection of abstract variables.
- DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
/// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
@@ -313,20 +312,16 @@ class DwarfDebug : public DebugHandlerBase {
typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
- /// Find abstract variable associated with Var.
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
- const DILocalVariable *&Cleansed);
- DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
- void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope);
- void ensureAbstractVariableIsCreated(InlinedVariable Var,
+ void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var,
const MDNode *Scope);
- void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var,
+ void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var,
const MDNode *Scope);
- DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV);
+ DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU,
+ LexicalScope &Scope, InlinedVariable IV);
/// Construct a DIE for this abstract scope.
- void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
void finishVariableDefinitions();
@@ -446,7 +441,8 @@ class DwarfDebug : public DebugHandlerBase {
const DbgValueHistoryMap::InstrRanges &Ranges);
/// Collect variable information from the side table maintained by MF.
- void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
+ void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
+ DenseSet<InlinedVariable> &P);
protected:
/// Gather pre-function debug information.
@@ -518,6 +514,8 @@ public:
/// split dwarf proposal support.
bool useSplitDwarf() const { return HasSplitDwarf; }
+ bool shareAcrossDWOCUs() const;
+
/// Returns the Dwarf Version.
uint16_t getDwarfVersion() const;
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index d4d2ed277274..54924e9806ed 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -53,6 +53,7 @@ class DwarfFile {
// Collection of abstract subprogram DIEs.
DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
/// be shared across CUs, that is why we keep the map here instead
@@ -105,6 +106,9 @@ public:
DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
return AbstractSPDies;
}
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> &getAbstractVariables() {
+ return AbstractVariables;
+ }
void insertDIE(const MDNode *TypeMD, DIE *Die) {
DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 8d25def7772c..667afbb450bd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -173,7 +173,7 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
}
/// Check whether the DIE for this MDNode can be shared across CUs.
-static bool isShareableAcrossCUs(const DINode *D) {
+bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// When the MDNode can be part of the type system, the DIE can be shared
// across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
@@ -181,6 +181,8 @@ static bool isShareableAcrossCUs(const DINode *D) {
// level already) but may be implementable for some value in projects
// building multiple independent libraries with LTO and then linking those
// together.
+ if (isDwoUnit() && !DD->shareAcrossDWOCUs())
+ return false;
return (isa<DIType>(D) ||
(isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
!GenerateDwarfTypeUnits;
@@ -645,7 +647,7 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
addString(Die,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
+ GlobalValue::dropLLVMManglingEscape(LinkageName));
}
void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8fc841703e23..7acad2cbd89f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -65,7 +65,7 @@ public:
//===----------------------------------------------------------------------===//
/// This dwarf writer support class manages information associated with a
/// source file.
- class DwarfUnit : public DIEUnit {
+class DwarfUnit : public DIEUnit {
protected:
/// MDNode for the compile unit.
const DICompileUnit *CUNode;
@@ -103,6 +103,9 @@ protected:
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
+ bool shareAcrossDWOCUs() const;
+ bool isShareableAcrossCUs(const DINode *D) const;
+
public:
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 704f0ac2f191..815658bfb637 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -101,7 +101,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
// functions may still refer to it.
const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
StringRef FLinkageName =
- GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
}
shouldEmitLSDA = hasEHFunclets;
@@ -174,7 +174,7 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
// their funclet entry block's number.
const MachineFunction *MF = MBB->getParent();
const Function *F = MF->getFunction();
- StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
MCContext &Ctx = MF->getContext();
StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
@@ -252,7 +252,7 @@ void WinException::endFunclet() {
!CurrentFuncletEntry->isCleanupFuncletEntry()) {
// If this is a C++ catch funclet (or the parent function),
// emit a reference to the LSDA for the parent function.
- StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
Twine("$cppxdata$", FuncLinkageName));
Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
@@ -536,7 +536,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
// Emit a label assignment with the SEH frame offset so we can use it for
// llvm.x86.seh.recoverfp.
StringRef FLinkageName =
- GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName());
MCSymbol *ParentFrameOffset =
Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
const MCExpr *MCOffset =
@@ -635,7 +635,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
auto &OS = *Asm->OutStreamer;
const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
- StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
MCSymbol *FuncInfoXData = nullptr;
@@ -942,7 +942,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
MCStreamer &OS = *Asm->OutStreamer;
const Function *F = MF->getFunction();
- StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
+ StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName());
bool VerboseAsm = OS.isVerboseAsm();
auto AddComment = [&](const Twine &Comment) {
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 9c19a4fd3c3e..17e6be05eb42 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -47,8 +47,7 @@ namespace {
bool runOnFunction(Function &F) override;
private:
- bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
- bool IsStore, bool IsLoad);
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
bool tryExpandAtomicLoad(LoadInst *LI);
@@ -224,22 +223,16 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (TLI->shouldInsertFencesForAtomic(I)) {
auto FenceOrdering = AtomicOrdering::Monotonic;
- bool IsStore, IsLoad;
if (LI && isAcquireOrStronger(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
LI->setOrdering(AtomicOrdering::Monotonic);
- IsStore = false;
- IsLoad = true;
} else if (SI && isReleaseOrStronger(SI->getOrdering())) {
FenceOrdering = SI->getOrdering();
SI->setOrdering(AtomicOrdering::Monotonic);
- IsStore = true;
- IsLoad = false;
} else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
isAcquireOrStronger(RMWI->getOrdering()))) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(AtomicOrdering::Monotonic);
- IsStore = IsLoad = true;
} else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getSuccessOrdering()))) {
@@ -250,11 +243,10 @@ bool AtomicExpand::runOnFunction(Function &F) {
FenceOrdering = CASI->getSuccessOrdering();
CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
CASI->setFailureOrdering(AtomicOrdering::Monotonic);
- IsStore = IsLoad = true;
}
if (FenceOrdering != AtomicOrdering::Monotonic) {
- MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+ MadeChange |= bracketInstWithFences(I, FenceOrdering);
}
}
@@ -320,13 +312,12 @@ bool AtomicExpand::runOnFunction(Function &F) {
return MadeChange;
}
-bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
- bool IsStore, bool IsLoad) {
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
IRBuilder<> Builder(I);
- auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
+ auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
- auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
+ auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
// The trailing fence is emitted before the instruction instead of after
// because there is no easy way of setting Builder insertion point after
// an instruction. So we must erase it from the BB, and insert it back
@@ -1048,8 +1039,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
- TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitLeadingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1064,8 +1054,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
- TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitLeadingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(TryStoreBB);
Builder.SetInsertPoint(TryStoreBB);
@@ -1094,8 +1083,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// necessary.
Builder.SetInsertPoint(SuccessBB);
if (ShouldInsertFencesForAtomic)
- TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitTrailingFence(Builder, CI, SuccessOrder);
Builder.CreateBr(ExitBB);
Builder.SetInsertPoint(NoStoreBB);
@@ -1107,8 +1095,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.SetInsertPoint(FailureBB);
if (ShouldInsertFencesForAtomic)
- TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
+ TLI->emitTrailingFence(Builder, CI, FailureOrder);
Builder.CreateBr(ExitBB);
// Finally, we have control-flow based knowledge of whether the cmpxchg
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 26da748fa244..55a27e2fb79e 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
+ ExpandReductions.cpp
FaultMaps.cpp
FEntryInserter.cpp
FuncletLayout.cpp
@@ -48,6 +49,7 @@ add_llvm_library(LLVMCodeGen
LivePhysRegs.cpp
LiveRangeCalc.cpp
LiveRangeEdit.cpp
+ LiveRangeShrink.cpp
LiveRegMatrix.cpp
LiveRegUnits.cpp
LiveStackAnalysis.cpp
@@ -118,6 +120,7 @@ add_llvm_library(LLVMCodeGen
SafeStack.cpp
SafeStackColoring.cpp
SafeStackLayout.cpp
+ ScalarizeMaskedMemIntrin.cpp
ScheduleDAG.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 3fc12ccc3b60..4d30c6574b12 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -43,6 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
+ initializeLiveRangeShrinkPass(Registry);
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
@@ -79,7 +80,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
- initializeSafeStackPass(Registry);
+ initializeSafeStackLegacyPassPass(Registry);
+ initializeScalarizeMaskedMemIntrinPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c6c93811a0f9..f2e024c5e3bd 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -295,7 +295,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
else if (PSI->isFunctionColdInCallGraph(&F))
- F.setSectionPrefix(".cold");
+ F.setSectionPrefix(".unlikely");
}
/// This optimization identifies DIV instructions that can be
@@ -1549,519 +1549,6 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
return MadeChange;
}
-// Translate a masked load intrinsic like
-// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.load, label %else
-//
-//cond.load: ; preds = %0
-// %4 = getelementptr i32* %1, i32 0
-// %5 = load i32* %4
-// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
-// br label %else
-//
-//else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
-// %7 = extractelement <16 x i1> %mask, i32 1
-// %8 = icmp eq i1 %7, true
-// br i1 %8, label %cond.load1, label %else2
-//
-//cond.load1: ; preds = %else
-// %9 = getelementptr i32* %1, i32 1
-// %10 = load i32* %9
-// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
-// br label %else2
-//
-//else2: ; preds = %else, %cond.load1
-// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
-// %12 = extractelement <16 x i1> %mask, i32 2
-// %13 = icmp eq i1 %12, true
-// br i1 %13, label %cond.load4, label %else5
-//
-static void scalarizeMaskedLoad(CallInst *CI) {
- Value *Ptr = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
- assert(VecType && "Unexpected return type of masked load intrinsic");
-
- Type *EltTy = CI->getType()->getVectorElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
-
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- bool IsAllOnesMask = isa<Constant>(Mask) &&
- cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
- Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
- // Bitcast %addr fron i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = VecType->getNumElements();
-
- Value *UndefVal = UndefValue::get(VecType);
-
- // The result vector
- Value *VResult = UndefVal;
-
- if (isa<ConstantVector>(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx));
- }
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
- // Fill the "else" block, created in the previous iteration
- //
- // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_load = icmp eq i1 %mask_1, true
- // br i1 %to_load, label %cond.load, label %else
- //
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
-
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
- }
-
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
-}
-
-// Translate a masked store intrinsic, like
-// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
-// <16 x i1> %mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set
-//
-// %1 = bitcast i8* %addr to i32*
-// %2 = extractelement <16 x i1> %mask, i32 0
-// %3 = icmp eq i1 %2, true
-// br i1 %3, label %cond.store, label %else
-//
-// cond.store: ; preds = %0
-// %4 = extractelement <16 x i32> %val, i32 0
-// %5 = getelementptr i32* %1, i32 0
-// store i32 %4, i32* %5
-// br label %else
-//
-// else: ; preds = %0, %cond.store
-// %6 = extractelement <16 x i1> %mask, i32 1
-// %7 = icmp eq i1 %6, true
-// br i1 %7, label %cond.store1, label %else2
-//
-// cond.store1: ; preds = %else
-// %8 = extractelement <16 x i32> %val, i32 1
-// %9 = getelementptr i32* %1, i32 1
-// store i32 %8, i32* %9
-// br label %else2
-// . . .
-static void scalarizeMaskedStore(CallInst *CI) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptr = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- VectorType *VecType = dyn_cast<VectorType>(Src->getType());
- assert(VecType && "Unexpected data type in masked store intrinsic");
-
- Type *EltTy = VecType->getElementType();
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- // Short-cut if the mask is all-true.
- bool IsAllOnesMask = isa<Constant>(Mask) &&
- cast<Constant>(Mask)->isAllOnesValue();
-
- if (IsAllOnesMask) {
- Builder.CreateAlignedStore(Src, Ptr, AlignVal);
- CI->eraseFromParent();
- return;
- }
-
- // Adjust alignment for the scalar instruction.
- AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
- // Bitcast %addr fron i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
- unsigned VectorWidth = VecType->getNumElements();
-
- if (isa<ConstantVector>(Mask)) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
- }
- CI->eraseFromParent();
- return;
- }
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
- // Fill the "else" block, created in the previous iteration
- //
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
- // %to_store = icmp eq i1 %mask_1, true
- // br i1 %to_store, label %cond.store, label %else
- //
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1));
-
- // Create "cond" block
- //
- // %OneElt = extractelement <16 x i32> %Src, i32 Idx
- // %EltAddr = getelementptr i32* %1, i32 0
- // %store i32 %OneElt, i32* %EltAddr
- //
- BasicBlock *CondBlock =
- IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock =
- CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-}
-
-// Translate a masked gather intrinsic like
-// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
-// <16 x i1> %Mask, <16 x i32> %Src)
-// to a chain of basic blocks, with loading element one-by-one if
-// the appropriate mask bit is set
-//
-// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> %Mask, i32 0
-// % ToLoad0 = icmp eq i1 % Mask0, true
-// br i1 % ToLoad0, label %cond.load, label %else
-//
-// cond.load:
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// % Load0 = load i32, i32* % Ptr0, align 4
-// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
-// br label %else
-//
-// else:
-// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
-// % Mask1 = extractelement <16 x i1> %Mask, i32 1
-// % ToLoad1 = icmp eq i1 % Mask1, true
-// br i1 % ToLoad1, label %cond.load1, label %else2
-//
-// cond.load1:
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// % Load1 = load i32, i32* % Ptr1, align 4
-// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
-// br label %else2
-// . . .
-// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
-// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI) {
- Value *Ptrs = CI->getArgOperand(0);
- Value *Alignment = CI->getArgOperand(1);
- Value *Mask = CI->getArgOperand(2);
- Value *Src0 = CI->getArgOperand(3);
-
- VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-
- assert(VecType && "Unexpected return type of masked load intrinsic");
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- BasicBlock *CondBlock = nullptr;
- BasicBlock *PrevIfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
-
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- Value *UndefVal = UndefValue::get(VecType);
-
- // The result vector
- Value *VResult = UndefVal;
- unsigned VectorWidth = VecType->getNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
- "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx),
- "Res" + Twine(Idx));
- }
- Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- PHINode *Phi = nullptr;
- Value *PrevPhi = UndefVal;
-
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
-
- // Fill the "else" block, created in the previous iteration
- //
- // %Mask1 = extractelement <16 x i1> %Mask, i32 1
- // %ToLoad1 = icmp eq i1 %Mask1, true
- // br i1 %ToLoad1, label %cond.load, label %else
- //
- if (Idx > 0) {
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- PrevPhi = Phi;
- VResult = Phi;
- }
-
- Value *Predicate = Builder.CreateExtractElement(Mask,
- Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
- Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToLoad" + Twine(Idx));
-
- // Create "cond" block
- //
- // %EltAddr = getelementptr i32* %1, i32 0
- // %Elt = load i32* %EltAddr
- // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
- //
- CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
- Builder.SetInsertPoint(InsertPt);
-
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
- "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
- "Res" + Twine(Idx));
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- PrevIfBlock = IfBlock;
- IfBlock = NewIfBlock;
- }
-
- Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
- Phi->addIncoming(VResult, CondBlock);
- Phi->addIncoming(PrevPhi, PrevIfBlock);
- Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
-}
-
-// Translate a masked scatter intrinsic, like
-// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
-// <16 x i1> %Mask)
-// to a chain of basic blocks, that stores element one-by-one if
-// the appropriate mask bit is set.
-//
-// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
-// % Mask0 = extractelement <16 x i1> % Mask, i32 0
-// % ToStore0 = icmp eq i1 % Mask0, true
-// br i1 %ToStore0, label %cond.store, label %else
-//
-// cond.store:
-// % Elt0 = extractelement <16 x i32> %Src, i32 0
-// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
-// store i32 %Elt0, i32* % Ptr0, align 4
-// br label %else
-//
-// else:
-// % Mask1 = extractelement <16 x i1> % Mask, i32 1
-// % ToStore1 = icmp eq i1 % Mask1, true
-// br i1 % ToStore1, label %cond.store1, label %else2
-//
-// cond.store1:
-// % Elt1 = extractelement <16 x i32> %Src, i32 1
-// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
-// store i32 % Elt1, i32* % Ptr1, align 4
-// br label %else2
-// . . .
-static void scalarizeMaskedScatter(CallInst *CI) {
- Value *Src = CI->getArgOperand(0);
- Value *Ptrs = CI->getArgOperand(1);
- Value *Alignment = CI->getArgOperand(2);
- Value *Mask = CI->getArgOperand(3);
-
- assert(isa<VectorType>(Src->getType()) &&
- "Unexpected data type in masked scatter intrinsic");
- assert(isa<VectorType>(Ptrs->getType()) &&
- isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
- "Vector of pointers is expected in masked scatter intrinsic");
-
- IRBuilder<> Builder(CI->getContext());
- Instruction *InsertPt = CI;
- BasicBlock *IfBlock = CI->getParent();
- Builder.SetInsertPoint(InsertPt);
- Builder.SetCurrentDebugLocation(CI->getDebugLoc());
-
- unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
- unsigned VectorWidth = Src->getType()->getVectorNumElements();
-
- // Shorten the way if the mask is a vector of constants.
- bool IsConstMask = isa<ConstantVector>(Mask);
-
- if (IsConstMask) {
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
- continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
- }
- CI->eraseFromParent();
- return;
- }
- for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- // Fill the "else" block, created in the previous iteration
- //
- // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
- // % ToStore = icmp eq i1 % Mask1, true
- // br i1 % ToStore, label %cond.store, label %else
- //
- Value *Predicate = Builder.CreateExtractElement(Mask,
- Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
- Value *Cmp =
- Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
- ConstantInt::get(Predicate->getType(), 1),
- "ToStore" + Twine(Idx));
-
- // Create "cond" block
- //
- // % Elt1 = extractelement <16 x i32> %Src, i32 1
- // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
- // %store i32 % Elt1, i32* % Ptr1
- //
- BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
- Builder.SetInsertPoint(InsertPt);
-
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
- Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
-
- // Create "else" block, fill it in the next iteration
- BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
- Builder.SetInsertPoint(InsertPt);
- Instruction *OldBr = IfBlock->getTerminator();
- BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
- OldBr->eraseFromParent();
- IfBlock = NewIfBlock;
- }
- CI->eraseFromParent();
-}
-
/// If counting leading or trailing zeros is an expensive operation and a zero
/// input is defined, add a check for zero to avoid calling the intrinsic.
///
@@ -2242,39 +1729,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
return true;
}
- case Intrinsic::masked_load: {
- // Scalarize unsupported vector masked load
- if (!TTI->isLegalMaskedLoad(CI->getType())) {
- scalarizeMaskedLoad(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
- case Intrinsic::masked_store: {
- if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedStore(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
- case Intrinsic::masked_gather: {
- if (!TTI->isLegalMaskedGather(CI->getType())) {
- scalarizeMaskedGather(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
- case Intrinsic::masked_scatter: {
- if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedScatter(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
- }
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index ab2382e2db6d..e860906043dd 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -142,8 +142,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
MachineOperand &DstMO = MI->getOperand(0);
MachineOperand &SrcMO = MI->getOperand(1);
- if (SrcMO.getReg() == DstMO.getReg()) {
- DEBUG(dbgs() << "identity copy: " << *MI);
+ bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
+ if (IdentityCopy || SrcMO.isUndef()) {
+ DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") << *MI);
// No need to insert an identity copy instruction, but replace with a KILL
// if liveness is changed.
if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
new file mode 100644
index 000000000000..a40ea28056dd
--- /dev/null
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -0,0 +1,167 @@
+//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for reduction intrinsics, allowing targets
+// to enable the experimental intrinsics until just before codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+unsigned getOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ return Instruction::FAdd;
+ case Intrinsic::experimental_vector_reduce_fmul:
+ return Instruction::FMul;
+ case Intrinsic::experimental_vector_reduce_add:
+ return Instruction::Add;
+ case Intrinsic::experimental_vector_reduce_mul:
+ return Instruction::Mul;
+ case Intrinsic::experimental_vector_reduce_and:
+ return Instruction::And;
+ case Intrinsic::experimental_vector_reduce_or:
+ return Instruction::Or;
+ case Intrinsic::experimental_vector_reduce_xor:
+ return Instruction::Xor;
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ return Instruction::ICmp;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unexpected ID");
+ }
+}
+
+RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_smax:
+ return RecurrenceDescriptor::MRK_SIntMax;
+ case Intrinsic::experimental_vector_reduce_smin:
+ return RecurrenceDescriptor::MRK_SIntMin;
+ case Intrinsic::experimental_vector_reduce_umax:
+ return RecurrenceDescriptor::MRK_UIntMax;
+ case Intrinsic::experimental_vector_reduce_umin:
+ return RecurrenceDescriptor::MRK_UIntMin;
+ case Intrinsic::experimental_vector_reduce_fmax:
+ return RecurrenceDescriptor::MRK_FloatMax;
+ case Intrinsic::experimental_vector_reduce_fmin:
+ return RecurrenceDescriptor::MRK_FloatMin;
+ default:
+ return RecurrenceDescriptor::MRK_Invalid;
+ }
+}
+
+bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
+ bool Changed = false;
+ SmallVector<IntrinsicInst*, 4> Worklist;
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+ if (auto II = dyn_cast<IntrinsicInst>(&*I))
+ Worklist.push_back(II);
+
+ for (auto *II : Worklist) {
+ IRBuilder<> Builder(II);
+ Value *Vec = nullptr;
+ auto ID = II->getIntrinsicID();
+ auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ switch (ID) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_fmul:
+ // FMFs must be attached to the call, otherwise it's an ordered reduction
+ // and it can't be handled by generating this shuffle sequence.
+ // TODO: Implement scalarization of ordered reductions here for targets
+ // without native support.
+ if (!II->getFastMathFlags().unsafeAlgebra())
+ continue;
+ Vec = II->getArgOperand(1);
+ break;
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ Vec = II->getArgOperand(0);
+ MRK = getMRK(ID);
+ break;
+ default:
+ continue;
+ }
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+ auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ II->replaceAllUsesWith(Rdx);
+ II->eraseFromParent();
+ Changed = true;
+ }
+ return Changed;
+}
+
+class ExpandReductions : public FunctionPass {
+public:
+ static char ID;
+ ExpandReductions() : FunctionPass(ID) {
+ initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override {
+ const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return expandReductions(F, TTI);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.setPreservesCFG();
+ }
+};
+}
+
+char ExpandReductions::ID;
+INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
+ "Expand reduction intrinsics", false, false)
+
+FunctionPass *llvm::createExpandReductionsPass() {
+ return new ExpandReductions();
+}
+
+PreservedAnalyses ExpandReductionsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ if (!expandReductions(F, &TTI))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index eaf4056e47ea..4d4591042296 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -162,7 +162,7 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
return std::get<0>(getAction(MI, MRI)) == Legal;
}
-LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
+Optional<LLT> LegalizerInfo::findLegalType(const InstrAspect &Aspect,
LegalizeAction Action) const {
switch(Action) {
default:
@@ -174,20 +174,20 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
return Aspect.Type;
case NarrowScalar: {
return findLegalType(Aspect,
- [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+ [](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
}
case WidenScalar: {
- return findLegalType(Aspect, [&](LLT Ty) -> LLT {
+ return findLegalType(Aspect, [](LLT Ty) -> LLT {
return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
});
}
case FewerElements: {
return findLegalType(Aspect,
- [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+ [](LLT Ty) -> LLT { return Ty.halfElements(); });
}
case MoreElements: {
return findLegalType(Aspect,
- [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+ [](LLT Ty) -> LLT { return Ty.doubleElements(); });
}
}
}
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 7248f50945d0..2eb3cdee694d 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -204,12 +204,8 @@ uint64_t RegBankSelect::getRepairCost(
// TODO: use a dedicated constant for ImpossibleCost.
if (Cost != UINT_MAX)
return Cost;
- assert(!TPC->isGlobalISelAbortEnabled() &&
- "Legalization not available yet");
// Return the legalization cost of that repairing.
}
- assert(!TPC->isGlobalISelAbortEnabled() &&
- "Complex repairing not implemented yet");
return UINT_MAX;
}
@@ -452,6 +448,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
// Sums up the repairing cost of MO at each insertion point.
uint64_t RepairCost = getRepairCost(MO, ValMapping);
+
+ // This is an impossible to repair cost.
+ if (RepairCost == UINT_MAX)
+ continue;
+
// Bias used for splitting: 5%.
const uint64_t PercentageForBias = 5;
uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100;
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 3c93f8123b0d..254bdf10d804 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -110,3 +110,11 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
return None;
}
+
+const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (TargetOpcode::G_FCONSTANT != MI->getOpcode())
+ return nullptr;
+ return MI->getOperand(1).getFPImm();
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 37fe41582333..628d599a3cc7 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1318,7 +1318,8 @@ static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) {
return false;
PI = I++;
}
- return true;
+ // Finally see if the last I is indeed a successor to PI.
+ return PI->isSuccessor(&*I);
}
/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
@@ -1587,22 +1588,32 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
}
+ // To be able to insert code freely at the end of BBI we sometimes remove
+ // the branch from BBI to NextMBB temporarily. Remember if this happened.
+ bool RemovedBranchToNextMBB = false;
if (CvtMBB.pred_size() > 1) {
BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
- // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
- // explicitly remove CvtBBI as a successor.
+ // Keep the CFG updated.
BBI.BB->removeSuccessor(&CvtMBB, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
- // Now merge the entry of the triangle with the true block.
+ // Remove the branch from the entry of the triangle to NextBB to be able to
+ // do the merge below. Keep the CFG updated, but remember we removed the
+ // branch since we do want to execute NextMBB, either by introducing a
+ // branch to it again, or merging it into the entry block.
+ // How it's handled is decided further down.
BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
+ BBI.BB->removeSuccessor(&NextMBB, true);
+ RemovedBranchToNextMBB = true;
+
+ // Now merge the entry of the triangle with the true block.
MergeBlocks(BBI, *CvtBBI, false);
}
@@ -1640,12 +1651,19 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
// block. By not merging them, we make it possible to iteratively
// ifcvt the blocks.
if (!HasEarlyExit &&
- NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
+ // We might have removed BBI from NextMBB's predecessor list above but
+ // we want it to be there, so consider that too.
+ (NextMBB.pred_size() == (RemovedBranchToNextMBB ? 0 : 1)) &&
+ !NextBBI->HasFallThrough &&
!NextMBB.hasAddressTaken()) {
+ // We will merge NextBBI into BBI, and thus remove the current
+ // fallthrough from BBI into CvtBBI.
+ BBI.BB->removeSuccessor(&CvtMBB, true);
MergeBlocks(BBI, *NextBBI);
FalseBBDead = true;
} else {
InsertUncondBranch(*BBI.BB, NextMBB, TII);
+ BBI.BB->addSuccessor(&NextMBB);
BBI.HasFallThrough = false;
}
// Mixed predicated and unpredicated code. This cannot be iteratively
@@ -1653,8 +1671,6 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
IterIfcvt = false;
}
- RemoveExtraEdges(BBI);
-
// Update block info. BB can be iteratively if-converted.
if (!IterIfcvt)
BBI.IsDone = true;
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
new file mode 100644
index 000000000000..00182e2c779f
--- /dev/null
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -0,0 +1,211 @@
+//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+///===---------------------------------------------------------------------===//
+///
+/// \file
+/// This pass moves instructions close to the definition of its operands to
+/// shrink live range of the def instruction. The code motion is limited within
+/// the basic block. The moved instruction should have 1 def, and more than one
+/// uses, all of which are the only use of the def.
+///
+///===---------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "lrshrink"
+
+STATISTIC(NumInstrsHoistedToShrinkLiveRange,
+ "Number of insructions hoisted to shrink live range.");
+
+using namespace llvm;
+
+namespace {
+class LiveRangeShrink : public MachineFunctionPass {
+public:
+ static char ID;
+
+ LiveRangeShrink() : MachineFunctionPass(ID) {
+ initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Live Range Shrink"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char LiveRangeShrink::ID = 0;
+char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID;
+
+INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false,
+ false)
+namespace {
+typedef DenseMap<MachineInstr *, unsigned> InstOrderMap;
+
+/// Returns \p New if it's dominated by \p Old, otherwise return \p Old.
+/// \p M maintains a map from instruction to its dominating order that satisfies
+/// M[A] > M[B] guarantees that A is dominated by B.
+/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return
+/// \p New.
+MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old,
+ const InstOrderMap &M) {
+ auto NewIter = M.find(&New);
+ if (NewIter == M.end())
+ return Old;
+ if (Old == nullptr)
+ return &New;
+ unsigned OrderOld = M.find(Old)->second;
+ unsigned OrderNew = NewIter->second;
+ if (OrderOld != OrderNew)
+ return OrderOld < OrderNew ? &New : Old;
+ // OrderOld == OrderNew, we need to iterate down from Old to see if it
+ // can reach New, if yes, New is dominated by Old.
+ for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew;
+ I = I->getNextNode())
+ if (I == &New)
+ return &New;
+ return Old;
+}
+
+/// Builds Instruction to its dominating order number map \p M by traversing
+/// from instruction \p Start.
+void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) {
+ M.clear();
+ unsigned i = 0;
+ for (MachineInstr &I : make_range(Start, Start->getParent()->end()))
+ M[&I] = i++;
+}
+} // end anonymous namespace
+
+bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ InstOrderMap IOM;
+ // Map from register to instruction order (value of IOM) where the
+ // register is used last. When moving instructions up, we need to
+ // make sure all its defs (including dead def) will not cross its
+ // last use when moving up.
+ DenseMap<unsigned, unsigned> UseMap;
+
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.empty())
+ continue;
+ bool SawStore = false;
+ BuildInstOrderMap(MBB.begin(), IOM);
+ UseMap.clear();
+
+ for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) {
+ MachineInstr &MI = *Next;
+ ++Next;
+ if (MI.isPHI() || MI.isDebugValue())
+ continue;
+ if (MI.mayStore())
+ SawStore = true;
+
+ unsigned CurrentOrder = IOM[&MI];
+ unsigned Barrier = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDebug())
+ continue;
+ if (MO.isUse())
+ UseMap[MO.getReg()] = CurrentOrder;
+ else if (MO.isDead() && UseMap.count(MO.getReg()))
+ // Barrier is the last instruction where MO get used. MI should not
+ // be moved above Barrier.
+ Barrier = std::max(Barrier, UseMap[MO.getReg()]);
+ }
+
+ if (!MI.isSafeToMove(nullptr, SawStore)) {
+ // If MI has side effects, it should become a barrier for code motion.
+ // IOM is rebuild from the next instruction to prevent later
+ // instructions from being moved before this MI.
+ if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
+ BuildInstOrderMap(Next, IOM);
+ SawStore = false;
+ }
+ continue;
+ }
+
+ const MachineOperand *DefMO = nullptr;
+ MachineInstr *Insert = nullptr;
+
+ // Number of live-ranges that will be shortened. We do not count
+ // live-ranges that are defined by a COPY as it could be coalesced later.
+ unsigned NumEligibleUse = 0;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || MO.isDead() || MO.isDebug())
+ continue;
+ unsigned Reg = MO.getReg();
+ // Do not move the instruction if it def/uses a physical register,
+ // unless it is a constant physical register.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !MRI.isConstantPhysReg(Reg)) {
+ Insert = nullptr;
+ break;
+ }
+ if (MO.isDef()) {
+ // Do not move if there is more than one def.
+ if (DefMO) {
+ Insert = nullptr;
+ break;
+ }
+ DefMO = &MO;
+ } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg)) {
+ MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
+ if (!DefInstr.isCopy())
+ NumEligibleUse++;
+ Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
+ } else {
+ Insert = nullptr;
+ break;
+ }
+ }
+ // Move the instruction when # of shrunk live range > 1.
+ if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) {
+ MachineBasicBlock::iterator I = std::next(Insert->getIterator());
+ // Skip all the PHI and debug instructions.
+ while (I != MBB.end() && (I->isPHI() || I->isDebugValue()))
+ I = std::next(I);
+ if (I == MI.getIterator())
+ continue;
+
+ // Update the dominator order to be the same as the insertion point.
+ // We do this to maintain a non-decreasing order without need to update
+ // all instruction orders after the insertion point.
+ unsigned NewOrder = IOM[&*I];
+ IOM[&MI] = NewOrder;
+ NumInstrsHoistedToShrinkLiveRange++;
+
+ // Find MI's debug value following MI.
+ MachineBasicBlock::iterator EndIter = std::next(MI.getIterator());
+ if (MI.getOperand(0).isReg())
+ for (; EndIter != MBB.end() && EndIter->isDebugValue() &&
+ EndIter->getOperand(0).isReg() &&
+ EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg();
+ ++EndIter, ++Next)
+ IOM[&*EndIter] = NewOrder;
+ MBB.splice(I, &MBB, MI.getIterator(), EndIter);
+ }
+ }
+ }
+ return false;
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 3568b0294ad9..a9aec926115a 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -767,7 +767,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
MachineBasicBlock *SuccBB) {
const unsigned NumNew = BB->getNumber();
- SmallSet<unsigned, 16> Defs, Kills;
+ DenseSet<unsigned> Defs, Kills;
MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end();
for (; BBI != BBE && BBI->isPHI(); ++BBI) {
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 4cfc128a8c1d..5003115a770f 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -133,6 +133,14 @@ static cl::opt<unsigned> TailDupPlacementThreshold(
"that won't conflict."), cl::init(2),
cl::Hidden);
+// Heuristic for aggressive tail duplication.
+static cl::opt<unsigned> TailDupPlacementAggressiveThreshold(
+ "tail-dup-placement-aggressive-threshold",
+ cl::desc("Instruction cutoff for aggressive tail duplication during "
+ "layout. Used at -O3. Tail merging during layout is forced to "
+ "have a threshold that won't conflict."), cl::init(3),
+ cl::Hidden);
+
// Heuristic for tail duplication.
static cl::opt<unsigned> TailDupPlacementPenalty(
"tail-dup-placement-penalty",
@@ -2646,9 +2654,26 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
assert(BlockToChain.empty());
assert(ComputedEdges.empty());
+ unsigned TailDupSize = TailDupPlacementThreshold;
+ // If only the aggressive threshold is explicitly set, use it.
+ if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 &&
+ TailDupPlacementThreshold.getNumOccurrences() == 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // For agressive optimization, we can adjust some thresholds to be less
+ // conservative.
+ if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) {
+ // At O3 we should be more willing to copy blocks for tail duplication. This
+ // increases size pressure, so we only do it at O3
+ // Do this unless only the regular threshold is explicitly set.
+ if (TailDupPlacementThreshold.getNumOccurrences() == 0 ||
+ TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0)
+ TailDupSize = TailDupPlacementAggressiveThreshold;
+ }
+
if (TailDupPlacement) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- unsigned TailDupSize = TailDupPlacementThreshold;
if (MF.getFunction()->optForSize())
TailDupSize = 1;
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
@@ -2658,7 +2683,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
buildCFGChains();
// Changing the layout can create new tail merging opportunities.
- TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structured CFG.
bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
@@ -2666,7 +2690,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- unsigned TailMergeSize = TailDupPlacementThreshold + 1;
+ unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index bfb2cde030dc..ab433273b189 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -2063,12 +2063,12 @@ void MachineVerifier::verifyStackFrame() {
if (I.getOpcode() == FrameSetupOpcode) {
if (BBState.ExitIsSetup)
report("FrameSetup is after another FrameSetup", &I);
- BBState.ExitValue -= TII->getFrameSize(I);
+ BBState.ExitValue -= TII->getFrameTotalSize(I);
BBState.ExitIsSetup = true;
}
if (I.getOpcode() == FrameDestroyOpcode) {
- int Size = TII->getFrameSize(I);
+ int Size = TII->getFrameTotalSize(I);
if (!BBState.ExitIsSetup)
report("FrameDestroy is not after a FrameSetup", &I);
int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index c67a25b888bf..db2264b2439d 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -34,7 +34,7 @@
#include <algorithm>
using namespace llvm;
-#define DEBUG_TYPE "phielim"
+#define DEBUG_TYPE "phi-node-elimination"
static cl::opt<bool>
DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index bf44ee8453b6..1803ea2b9249 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -3214,7 +3214,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
CurrList(WorkList.begin() + PrevSize, WorkList.end());
if (copyCoalesceWorkList(CurrList))
WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
- (MachineInstr*)nullptr), WorkList.end());
+ nullptr), WorkList.end());
}
void RegisterCoalescer::coalesceLocals() {
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 35db30f89976..0635e5c0a63c 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -62,10 +62,9 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
}
this->MBB = &MBB;
- for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
- IE = Scavenged.end(); I != IE; ++I) {
- I->Reg = 0;
- I->Restore = nullptr;
+ for (ScavengedInfo &SI : Scavenged) {
+ SI.Reg = 0;
+ SI.Restore = nullptr;
}
Tracking = false;
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 7fa379d80c6c..08b3d345f689 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -19,6 +19,7 @@
#include "SafeStackLayout.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -92,11 +93,11 @@ public:
/// determined statically), and the unsafe stack, which contains all
/// local variables that are accessed in ways that we can't prove to
/// be safe.
-class SafeStack : public FunctionPass {
- const TargetMachine *TM;
- const TargetLoweringBase *TL;
- const DataLayout *DL;
- ScalarEvolution *SE;
+class SafeStack {
+ Function &F;
+ const TargetLoweringBase &TL;
+ const DataLayout &DL;
+ ScalarEvolution &SE;
Type *StackPtrTy;
Type *IntPtrTy;
@@ -171,33 +172,21 @@ class SafeStack : public FunctionPass {
uint64_t AllocaSize);
public:
- static char ID; // Pass identification, replacement for typeid.
- SafeStack(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
- initializeSafeStackPass(*PassRegistry::getPassRegistry());
- }
- SafeStack() : SafeStack(nullptr) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolutionWrapperPass>();
- }
-
- bool doInitialization(Module &M) override {
- DL = &M.getDataLayout();
-
- StackPtrTy = Type::getInt8PtrTy(M.getContext());
- IntPtrTy = DL->getIntPtrType(M.getContext());
- Int32Ty = Type::getInt32Ty(M.getContext());
- Int8Ty = Type::getInt8Ty(M.getContext());
-
- return false;
- }
-
- bool runOnFunction(Function &F) override;
-}; // class SafeStack
+ SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL,
+ ScalarEvolution &SE)
+ : F(F), TL(TL), DL(DL), SE(SE),
+ StackPtrTy(Type::getInt8PtrTy(F.getContext())),
+ IntPtrTy(DL.getIntPtrType(F.getContext())),
+ Int32Ty(Type::getInt32Ty(F.getContext())),
+ Int8Ty(Type::getInt8Ty(F.getContext())) {}
+
+ // Run the transformation on the associated function.
+ // Returns whether the function was changed.
+ bool run();
+};
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
- uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
auto C = dyn_cast<ConstantInt>(AI->getArraySize());
if (!C)
@@ -209,11 +198,11 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
const Value *AllocaPtr, uint64_t AllocaSize) {
- AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
- const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
+ AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
- uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
- ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+ uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType());
+ ConstantRange AccessStartRange = SE.getUnsignedRange(Expr);
ConstantRange SizeRange =
ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
ConstantRange AccessRange = AccessStartRange.add(SizeRange);
@@ -226,8 +215,8 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
<< *AllocaPtr << "\n"
<< " Access " << *Addr << "\n"
<< " SCEV " << *Expr
- << " U: " << SE->getUnsignedRange(Expr)
- << ", S: " << SE->getSignedRange(Expr) << "\n"
+ << " U: " << SE.getUnsignedRange(Expr)
+ << ", S: " << SE.getSignedRange(Expr) << "\n"
<< " Range " << AccessRange << "\n"
<< " AllocaRange " << AllocaRange << "\n"
<< " " << (Safe ? "safe" : "unsafe") << "\n");
@@ -266,7 +255,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
switch (I->getOpcode()) {
case Instruction::Load: {
- if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+ if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr,
AllocaSize))
return false;
break;
@@ -282,7 +271,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
return false;
}
- if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+ if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getOperand(0)->getType()),
AllocaPtr, AllocaSize))
return false;
break;
@@ -343,7 +332,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
}
Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
- Value *StackGuardVar = TL->getIRStackGuard(IRB);
+ Value *StackGuardVar = TL.getIRStackGuard(IRB);
if (!StackGuardVar)
StackGuardVar =
F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
@@ -390,7 +379,7 @@ void SafeStack::findInsts(Function &F,
if (!Arg.hasByValAttr())
continue;
uint64_t Size =
- DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+ DL.getTypeStoreSize(Arg.getType()->getPointerElementType());
if (IsSafeStackAlloca(&Arg, Size))
continue;
@@ -476,19 +465,19 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
unsigned Align =
- std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
+ std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
}
for (Argument *Arg : ByValArguments) {
Type *Ty = Arg->getType()->getPointerElementType();
- uint64_t Size = DL->getTypeStoreSize(Ty);
+ uint64_t Size = DL.getTypeStoreSize(Ty);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+ unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty),
Arg->getParamAlignment());
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -501,7 +490,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
// Ensure the object is properly aligned.
unsigned Align =
- std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment());
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment());
SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI));
}
@@ -539,7 +528,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
unsigned Offset = SSL.getObjectOffset(Arg);
Type *Ty = Arg->getType()->getPointerElementType();
- uint64_t Size = DL->getTypeStoreSize(Ty);
+ uint64_t Size = DL.getTypeStoreSize(Ty);
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
@@ -630,7 +619,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false);
Type *Ty = AI->getAllocatedType();
- uint64_t TySize = DL->getTypeAllocSize(Ty);
+ uint64_t TySize = DL.getTypeAllocSize(Ty);
Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
@@ -638,7 +627,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
unsigned Align = std::max(
- std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()),
+ std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
(unsigned)StackAlignment);
assert(isPowerOf2_32(Align));
@@ -685,25 +674,10 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
}
}
-bool SafeStack::runOnFunction(Function &F) {
- DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
-
- if (!F.hasFnAttribute(Attribute::SafeStack)) {
- DEBUG(dbgs() << "[SafeStack] safestack is not requested"
- " for this function\n");
- return false;
- }
-
- if (F.isDeclaration()) {
- DEBUG(dbgs() << "[SafeStack] function definition"
- " is not available\n");
- return false;
- }
-
- if (!TM)
- report_fatal_error("Target machine is required");
- TL = TM->getSubtargetImpl(F)->getTargetLowering();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+bool SafeStack::run() {
+ assert(F.hasFnAttribute(Attribute::SafeStack) &&
+ "Can't run SafeStack on a function without the attribute");
+ assert(!F.isDeclaration() && "Can't run SafeStack on a function declaration");
++NumFunctions;
@@ -736,7 +710,7 @@ bool SafeStack::runOnFunction(Function &F) {
++NumUnsafeStackRestorePointsFunctions;
IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
- UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB);
+ UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB);
// Load the current stack pointer (we'll also use it as a base pointer).
// FIXME: use a dedicated register for it ?
@@ -788,14 +762,70 @@ bool SafeStack::runOnFunction(Function &F) {
return true;
}
+class SafeStackLegacyPass : public FunctionPass {
+ const TargetMachine *TM;
+
+public:
+ static char ID; // Pass identification, replacement for typeid..
+ SafeStackLegacyPass(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {
+ initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ SafeStackLegacyPass() : SafeStackLegacyPass(nullptr) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
+
+ if (!F.hasFnAttribute(Attribute::SafeStack)) {
+ DEBUG(dbgs() << "[SafeStack] safestack is not requested"
+ " for this function\n");
+ return false;
+ }
+
+ if (F.isDeclaration()) {
+ DEBUG(dbgs() << "[SafeStack] function definition"
+ " is not available\n");
+ return false;
+ }
+
+ if (!TM)
+ report_fatal_error("Target machine is required");
+ auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (!TL)
+ report_fatal_error("TargetLowering instance is required");
+
+ auto *DL = &F.getParent()->getDataLayout();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ // Compute DT and LI only for functions that have the attribute.
+ // This is only useful because the legacy pass manager doesn't let us
+ // compute analyzes lazily.
+ // In the backend pipeline, nothing preserves DT before SafeStack, so we
+ // would otherwise always compute it wastefully, even if there is no
+ // function with the safestack attribute.
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+
+ ScalarEvolution SE(F, TLI, ACT, DT, LI);
+
+ return SafeStack(F, *TL, *DL, SE).run();
+ }
+};
+
} // anonymous namespace
-char SafeStack::ID = 0;
-INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
+char SafeStackLegacyPass::ID = 0;
+INITIALIZE_TM_PASS_BEGIN(SafeStackLegacyPass, "safe-stack",
"Safe Stack instrumentation pass", false, false)
-INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
+INITIALIZE_TM_PASS_END(SafeStackLegacyPass, "safe-stack",
"Safe Stack instrumentation pass", false, false)
FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
- return new SafeStack(TM);
+ return new SafeStackLegacyPass(TM);
}
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
new file mode 100644
index 000000000000..dab5b91f50ad
--- /dev/null
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -0,0 +1,660 @@
+//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===//
+//=== instrinsics ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces masked memory intrinsics - when unsupported by the target
+// - with a chain of basic blocks, that deal with the elements one-by-one if the
+// appropriate mask bit is set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "scalarize-masked-mem-intrin"
+
+namespace {
+
+class ScalarizeMaskedMemIntrin : public FunctionPass {
+ const TargetTransformInfo *TTI;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) {
+ initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ StringRef getPassName() const override {
+ return "Scalarize Masked Memory Intrinsics";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+private:
+ bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
+ bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
+};
+} // namespace
+
+char ScalarizeMaskedMemIntrin::ID = 0;
+INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin",
+ "Scalarize unsupported masked memory intrinsics", false,
+ false)
+INITIALIZE_PASS_END(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin",
+ "Scalarize unsupported masked memory intrinsics", false,
+ false)
+
+FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() {
+ return new ScalarizeMaskedMemIntrin();
+}
+
+// Translate a masked load intrinsic like
+// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask, <16 x i32> %passthru)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.load, label %else
+//
+// cond.load: ; preds = %0
+// %4 = getelementptr i32* %1, i32 0
+// %5 = load i32* %4
+// %6 = insertelement <16 x i32> undef, i32 %5, i32 0
+// br label %else
+//
+// else: ; preds = %0, %cond.load
+// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ]
+// %7 = extractelement <16 x i1> %mask, i32 1
+// %8 = icmp eq i1 %7, true
+// br i1 %8, label %cond.load1, label %else2
+//
+// cond.load1: ; preds = %else
+// %9 = getelementptr i32* %1, i32 1
+// %10 = load i32* %9
+// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1
+// br label %else2
+//
+// else2: ; preds = %else, %cond.load1
+// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+// %12 = extractelement <16 x i1> %mask, i32 2
+// %13 = icmp eq i1 %12, true
+// br i1 %13, label %cond.load4, label %else5
+//
+static void scalarizeMaskedLoad(CallInst *CI) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ Type *EltTy = CI->getType()->getVectorElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask =
+ isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8);
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_load = icmp eq i1 %mask_1, true
+ // br i1 %to_load, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+ }
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// Translate a masked store intrinsic, like
+// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
+// <16 x i1> %mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set
+//
+// %1 = bitcast i8* %addr to i32*
+// %2 = extractelement <16 x i1> %mask, i32 0
+// %3 = icmp eq i1 %2, true
+// br i1 %3, label %cond.store, label %else
+//
+// cond.store: ; preds = %0
+// %4 = extractelement <16 x i32> %val, i32 0
+// %5 = getelementptr i32* %1, i32 0
+// store i32 %4, i32* %5
+// br label %else
+//
+// else: ; preds = %0, %cond.store
+// %6 = extractelement <16 x i1> %mask, i32 1
+// %7 = icmp eq i1 %6, true
+// br i1 %7, label %cond.store1, label %else2
+//
+// cond.store1: ; preds = %else
+// %8 = extractelement <16 x i32> %val, i32 1
+// %9 = getelementptr i32* %1, i32 1
+// store i32 %8, i32* %9
+// br label %else2
+// . . .
+static void scalarizeMaskedStore(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ VectorType *VecType = dyn_cast<VectorType>(Src->getType());
+ assert(VecType && "Unexpected data type in masked store intrinsic");
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ // Short-cut if the mask is all-true.
+ bool IsAllOnesMask =
+ isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue();
+
+ if (IsAllOnesMask) {
+ Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // Adjust alignment for the scalar instruction.
+ AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8);
+ // Bitcast %addr fron i8* to EltTy*
+ Type *NewPtrType =
+ EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+ unsigned VectorWidth = VecType->getNumElements();
+
+ if (isa<ConstantVector>(Mask)) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %to_store = icmp eq i1 %mask_1, true
+ // br i1 %to_store, label %cond.store, label %else
+ //
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1));
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+ Value *Gep =
+ Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+// <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+//
+// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> %Mask, i32 0
+// % ToLoad0 = icmp eq i1 % Mask0, true
+// br i1 % ToLoad0, label %cond.load, label %else
+//
+// cond.load:
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// % Load0 = load i32, i32* % Ptr0, align 4
+// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// br label %else
+//
+// else:
+// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
+// % Mask1 = extractelement <16 x i1> %Mask, i32 1
+// % ToLoad1 = icmp eq i1 % Mask1, true
+// br i1 % ToLoad1, label %cond.load1, label %else2
+//
+// cond.load1:
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// % Load1 = load i32, i32* % Ptr1, align 4
+// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// br label %else2
+// . . .
+// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void scalarizeMaskedGather(CallInst *CI) {
+ Value *Ptrs = CI->getArgOperand(0);
+ Value *Alignment = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Value *Src0 = CI->getArgOperand(3);
+
+ VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+
+ assert(VecType && "Unexpected return type of masked load intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ BasicBlock *CondBlock = nullptr;
+ BasicBlock *PrevIfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Value *UndefVal = UndefValue::get(VecType);
+
+ // The result vector
+ Value *VResult = UndefVal;
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(
+ VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
+ }
+ Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
+ PHINode *Phi = nullptr;
+ Value *PrevPhi = UndefVal;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %ToLoad1 = icmp eq i1 %Mask1, true
+ // br i1 %ToLoad1, label %cond.load, label %else
+ //
+ if (Idx > 0) {
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ PrevPhi = Phi;
+ VResult = Phi;
+ }
+
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToLoad" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
+ "Res" + Twine(Idx));
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+ }
+
+ Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+ Phi->addIncoming(VResult, CondBlock);
+ Phi->addIncoming(PrevPhi, PrevIfBlock);
+ Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+}
+
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+// <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> % Mask, i32 0
+// % ToStore0 = icmp eq i1 % Mask0, true
+// br i1 %ToStore0, label %cond.store, label %else
+//
+// cond.store:
+// % Elt0 = extractelement <16 x i32> %Src, i32 0
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* % Ptr0, align 4
+// br label %else
+//
+// else:
+// % Mask1 = extractelement <16 x i1> % Mask, i32 1
+// % ToStore1 = icmp eq i1 % Mask1, true
+// br i1 % ToStore1, label %cond.store1, label %else2
+//
+// cond.store1:
+// % Elt1 = extractelement <16 x i32> %Src, i32 1
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 % Elt1, i32* % Ptr1, align 4
+// br label %else2
+// . . .
+static void scalarizeMaskedScatter(CallInst *CI) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptrs = CI->getArgOperand(1);
+ Value *Alignment = CI->getArgOperand(2);
+ Value *Mask = CI->getArgOperand(3);
+
+ assert(isa<VectorType>(Src->getType()) &&
+ "Unexpected data type in masked scatter intrinsic");
+ assert(isa<VectorType>(Ptrs->getType()) &&
+ isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
+ "Vector of pointers is expected in masked scatter intrinsic");
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+ unsigned VectorWidth = Src->getType()->getVectorNumElements();
+
+ // Shorten the way if the mask is a vector of constants.
+ bool IsConstMask = isa<ConstantVector>(Mask);
+
+ if (IsConstMask) {
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+ continue;
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+ }
+ CI->eraseFromParent();
+ return;
+ }
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
+ // % ToStore = icmp eq i1 % Mask1, true
+ // br i1 % ToStore, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
+ "Mask" + Twine(Idx));
+ Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+ ConstantInt::get(Predicate->getType(), 1),
+ "ToStore" + Twine(Idx));
+
+ // Create "cond" block
+ //
+ // % Elt1 = extractelement <16 x i32> %Src, i32 1
+ // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+ // %store i32 % Elt1, i32* % Ptr1
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+ "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+ "Ptr" + Twine(Idx));
+ Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+ OldBr->eraseFromParent();
+ IfBlock = NewIfBlock;
+ }
+ CI->eraseFromParent();
+}
+
+bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ bool EverMadeChange = false;
+
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ bool MadeChange = true;
+ while (MadeChange) {
+ MadeChange = false;
+ for (Function::iterator I = F.begin(); I != F.end();) {
+ BasicBlock *BB = &*I++;
+ bool ModifiedDTOnIteration = false;
+ MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+
+ // Restart BB iteration if the dominator tree of the Function was changed
+ if (ModifiedDTOnIteration)
+ break;
+ }
+
+ EverMadeChange |= MadeChange;
+ }
+
+ return EverMadeChange;
+}
+
+bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
+ bool MadeChange = false;
+
+ BasicBlock::iterator CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
+ MadeChange |= optimizeCallInst(CI, ModifiedDT);
+ if (ModifiedDT)
+ return true;
+ }
+
+ return MadeChange;
+}
+
+bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
+ bool &ModifiedDT) {
+
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+ if (II) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::masked_load: {
+ // Scalarize unsupported vector masked load
+ if (!TTI->isLegalMaskedLoad(CI->getType())) {
+ scalarizeMaskedLoad(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_store: {
+ if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
+ scalarizeMaskedStore(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_gather: {
+ if (!TTI->isLegalMaskedGather(CI->getType())) {
+ scalarizeMaskedGather(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ case Intrinsic::masked_scatter: {
+ if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
+ scalarizeMaskedScatter(CI);
+ ModifiedDT = true;
+ return true;
+ }
+ return false;
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c77046fdfaf5..caf5cb497a71 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -114,7 +114,7 @@ namespace {
SmallPtrSet<SDNode *, 32> CombinedNodes;
// AA - Used for DAG load/store alias analysis.
- AliasAnalysis &AA;
+ AliasAnalysis *AA;
/// When an instruction is simplified, add all users of the instruction to
/// the work lists because they might get more simplified now.
@@ -496,9 +496,9 @@ namespace {
SDValue distributeTruncateThroughAnd(SDNode *N);
public:
- DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
+ OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
MaximumLegalStoreInBits = 0;
@@ -1729,10 +1729,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
NumLeftToConsider--;
}
- SDValue Result;
-
// If we've changed things around then replace token factor.
if (Changed) {
+ SDValue Result;
if (Ops.empty()) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
@@ -1749,13 +1748,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
}
-
- // Add users to worklist, since we may introduce a lot of new
- // chained token factors while removing memory deps.
- return CombineTo(N, Result, true /*add to worklist*/);
+ return Result;
}
-
- return Result;
+ return SDValue();
}
/// MERGE_VALUES can always be eliminated.
@@ -2131,17 +2126,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
// canonicalize constant to RHS
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
- N1, N0, CarryIn);
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
// fold (addcarry x, y, false) -> (uaddo x, y)
if (isNullConstant(CarryIn))
- return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N0, N1);
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -5313,17 +5308,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}
- // If the target supports masking y in (shl, y),
- // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
- if (TLI.isOperationLegal(ISD::SHL, VT) &&
- TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) {
- if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
- if (Mask->getZExtValue() == OpSizeInBits - 1) {
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
- }
- }
- }
-
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
@@ -5331,7 +5315,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
// fold (shl 0, x) -> 0
- if (isNullConstant(N0))
+ if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (shl x, c >= size(x)) -> undef
if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
@@ -5522,18 +5506,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
- // If the target supports masking y in (sra, y),
- // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
- if (TLI.isOperationLegal(ISD::SRA, VT) &&
- TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) {
- if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
- if (Mask->getZExtValue() == OpSizeInBits - 1) {
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
- }
- }
- }
-
// Arithmetic shifting an all-sign-bit value is a no-op.
+ // fold (sra 0, x) -> 0
+ // fold (sra -1, x) -> -1
if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
return N0;
@@ -5548,12 +5523,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
- // fold (sra 0, x) -> 0
- if (isNullConstant(N0))
- return N0;
- // fold (sra -1, x) -> -1
- if (isAllOnesConstant(N0))
- return N0;
// fold (sra x, c >= size(x)) -> undef
if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
return DAG.getUNDEF(VT);
@@ -5691,17 +5660,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
- // If the target supports masking y in (srl, y),
- // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
- if (TLI.isOperationLegal(ISD::SRL, VT) &&
- TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) {
- if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
- if (Mask->getZExtValue() == OpSizeInBits - 1) {
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
- }
- }
- }
-
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -5714,7 +5672,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
// fold (srl 0, x) -> 0
- if (isNullConstant(N0))
+ if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
@@ -7365,14 +7323,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueSizeInBits(),
std::min(Op.getValueSizeInBits(),
VT.getSizeInBits()));
- if (TruncatedBits.isSubsetOf(Known.Zero)) {
- if (VT.bitsGT(Op.getValueType()))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
- if (VT.bitsLT(Op.getValueType()))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
-
- return Op;
- }
+ if (TruncatedBits.isSubsetOf(Known.Zero))
+ return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
// fold (zext (truncate (load x))) -> (zext (smaller load x))
@@ -7419,14 +7371,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
- SDValue Op = N0.getOperand(0);
- if (SrcVT.bitsLT(VT)) {
- Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- } else if (SrcVT.bitsGT(VT)) {
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
- AddToWorklist(Op.getNode());
- }
+ SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ AddToWorklist(Op.getNode());
return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
}
}
@@ -7440,11 +7386,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getValueType()) ||
!TLI.isZExtFree(N0.getValueType(), VT))) {
SDValue X = N0.getOperand(0).getOperand(0);
- if (X.getValueType().bitsLT(VT)) {
- X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
- } else if (X.getValueType().bitsGT(VT)) {
- X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
- }
+ X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
SDLoc DL(N);
@@ -7669,14 +7611,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
}
// fold (aext (truncate x))
- if (N0.getOpcode() == ISD::TRUNCATE) {
- SDValue TruncOp = N0.getOperand(0);
- if (TruncOp.getValueType() == VT)
- return TruncOp; // x iff x size == zext size.
- if (TruncOp.getValueType().bitsGT(VT))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
- return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
- }
+ if (N0.getOpcode() == ISD::TRUNCATE)
+ return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
// Fold (aext (and (trunc x), cst)) -> (and x, cst)
// if the trunc is not free.
@@ -7687,11 +7623,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getValueType())) {
SDLoc DL(N);
SDValue X = N0.getOperand(0).getOperand(0);
- if (X.getValueType().bitsLT(VT)) {
- X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
- } else if (X.getValueType().bitsGT(VT)) {
- X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
- }
+ X = DAG.getAnyExtOrTrunc(X, DL, VT);
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
@@ -14868,6 +14800,55 @@ SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
return SDValue();
}
+// Combine shuffles of splat-shuffles of the form:
+// shuffle (shuffle V, undef, splat-mask), undef, M
+// If splat-mask contains undef elements, we need to be careful about
+// introducing undef's in the folded mask which are not the result of composing
+// the masks of the shuffles.
+static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
+ ShuffleVectorSDNode *Splat,
+ SelectionDAG &DAG) {
+ ArrayRef<int> SplatMask = Splat->getMask();
+ assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
+
+ // Prefer simplifying to the splat-shuffle, if possible. This is legal if
+ // every undef mask element in the splat-shuffle has a corresponding undef
+ // element in the user-shuffle's mask or if the composition of mask elements
+ // would result in undef.
+ // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
+ // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
+ // In this case it is not legal to simplify to the splat-shuffle because we
+ // may be exposing the users of the shuffle an undef element at index 1
+ // which was not there before the combine.
+ // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
+ // In this case the composition of masks yields SplatMask, so it's ok to
+ // simplify to the splat-shuffle.
+ // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
+ // In this case the composed mask includes all undef elements of SplatMask
+ // and in addition sets element zero to undef. It is safe to simplify to
+ // the splat-shuffle.
+ auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
+ ArrayRef<int> SplatMask) {
+ for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
+ if (UserMask[i] != -1 && SplatMask[i] == -1 &&
+ SplatMask[UserMask[i]] != -1)
+ return false;
+ return true;
+ };
+ if (CanSimplifyToExistingSplat(UserMask, SplatMask))
+ return SDValue(Splat, 0);
+
+ // Create a new shuffle with a mask that is composed of the two shuffles'
+ // masks.
+ SmallVector<int, 32> NewMask;
+ for (int Idx : UserMask)
+ NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
+
+ return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
+ Splat->getOperand(0), Splat->getOperand(1),
+ NewMask);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -14914,6 +14895,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
+ // A shuffle of a single vector that is a splat can always be folded.
+ if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
+ if (N1->isUndef() && N0Shuf->isSplat())
+ return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
+
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
@@ -16381,17 +16367,17 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA &&
+ if (UseAA && AA &&
Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
AliasResult AAResult =
- AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
- UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
- MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
- UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
+ AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
+ UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+ MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
+ UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
if (AAResult == NoAlias)
return false;
}
@@ -16605,7 +16591,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
}
/// This is the entry point for the file.
-void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
CodeGenOpt::Level OptLevel) {
/// This is the main entry point to this class.
DAGCombiner(*this, AA, OptLevel).Run(Level);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8c98e3740f6d..5003b79974eb 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -622,7 +622,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
// have to worry about calling conventions and target-specific lowering code.
// Instead we perform the call lowering right here.
//
- // CALLSEQ_START(0...)
+ // CALLSEQ_START(0, 0...)
// STACKMAP(id, nbytes, ...)
// CALLSEQ_END(0, 0)
//
@@ -1150,16 +1150,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
- unsigned Offset = 0;
+ // Byval arguments with frame indices were already handled after argument
+ // lowering and before isel.
+ const auto *Arg =
+ dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
+ if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ return true;
+
Optional<MachineOperand> Op;
- if (const auto *Arg = dyn_cast<Argument>(Address))
- // Some arguments' frame index is recorded during argument lowering.
- Offset = FuncInfo.getArgumentFrameIndex(Arg);
- if (Offset)
- Op = MachineOperand::CreateFI(Offset);
- if (!Op)
- if (unsigned Reg = lookUpRegForValue(Address))
- Op = MachineOperand::CreateReg(Reg, false);
+ if (unsigned Reg = lookUpRegForValue(Address))
+ Op = MachineOperand::CreateReg(Reg, false);
// If we have a VLA that has a "use" in a metadata node that's then used
// here but it has no other uses, then we have a problem. E.g.,
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cdf4d3a8b4e5..606b8952f3c1 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -85,7 +85,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF = &mf;
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- MachineModuleInfo &MMI = MF->getMMI();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
unsigned StackAlign = TFI->getStackAlignment();
@@ -214,33 +213,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
InitializeRegForValue(&I);
- // Collect llvm.dbg.declare information. This is done now instead of
- // during the initial isel pass through the IR so that it is done
- // in a predictable order.
- if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
- assert(DI->getVariable() && "Missing variable");
- assert(DI->getDebugLoc() && "Missing location");
- if (MMI.hasDebugInfo()) {
- // Don't handle byval struct arguments or VLAs, for example.
- // Non-byval arguments are handled here (they refer to the stack
- // temporary alloca at this point).
- const Value *Address = DI->getAddress();
- if (Address) {
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
- Address = BCI->getOperand(0);
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
- DenseMap<const AllocaInst *, int>::iterator SI =
- StaticAllocaMap.find(AI);
- if (SI != StaticAllocaMap.end()) { // Check for VLAs.
- int FI = SI->second;
- MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
- FI, DI->getDebugLoc());
- }
- }
- }
- }
- }
-
// Decide the preferred extend type for a value.
PreferredExtendType[&I] = getPreferredExtendForValue(&I);
}
@@ -510,12 +482,11 @@ void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
/// If the argument does not have any assigned frame index then 0 is
/// returned.
int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
- DenseMap<const Argument *, int>::iterator I =
- ByValArgFrameIndexMap.find(A);
+ auto I = ByValArgFrameIndexMap.find(A);
if (I != ByValArgFrameIndexMap.end())
return I->second;
DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
- return 0;
+ return INT_MAX;
}
unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2654b3ad7a62..9a47a914df91 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1493,7 +1493,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
@@ -4187,6 +4187,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
ReplacedNode(Node);
break;
}
+ case ISD::MUL:
case ISD::SDIV:
case ISD::SREM:
case ISD::UDIV:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index cde4331cc42d..4c3b514856b7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -675,6 +675,7 @@ private:
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
SDValue SplitVecOp_TruncateHelper(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 97a7fab6efd0..ff0e609803d8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1513,6 +1513,22 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND_VECTOR_INREG:
Res = SplitVecOp_ExtVecInRegOp(N);
break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = SplitVecOp_VECREDUCE(N, OpNo);
+ break;
}
}
@@ -1565,6 +1581,48 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
}
+SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+
+ SDValue VecOp = N->getOperand(OpNo);
+ EVT VecVT = VecOp.getValueType();
+ assert(VecVT.isVector() && "Can only split reduce vector operand");
+ GetSplitVector(VecOp, Lo, Hi);
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
+
+ bool NoNaN = N->getFlags().hasNoNaNs();
+ unsigned CombineOpc = 0;
+ switch (N->getOpcode()) {
+ case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break;
+ case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break;
+ case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break;
+ case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break;
+ case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break;
+ case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break;
+ case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break;
+ case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break;
+ case ISD::VECREDUCE_FMAX:
+ CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN;
+ break;
+ case ISD::VECREDUCE_FMIN:
+ CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN;
+ break;
+ default:
+ llvm_unreachable("Unexpected reduce ISD node");
+ }
+
+ // Use the appropriate scalar instruction on the split subvectors before
+ // reducing the now partially reduced smaller vector.
+ SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi);
+ return DAG.getNode(N->getOpcode(), dl, ResVT, Partial);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
// The result has a legal vector type, but the input needs splitting.
EVT ResVT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d605a1dc1c20..057badcd6b74 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2217,10 +2217,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- unsigned TrailZ = Known.Zero.countTrailingOnes() +
- Known2.Zero.countTrailingOnes();
- unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() +
- Known2.Zero.countLeadingOnes(),
+ unsigned TrailZ = Known.countMinTrailingZeros() +
+ Known2.countMinTrailingZeros();
+ unsigned LeadZ = std::max(Known.countMinLeadingZeros() +
+ Known2.countMinLeadingZeros(),
BitWidth) - BitWidth;
Known.resetAll();
@@ -2233,13 +2233,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
- unsigned LeadZ = Known2.Zero.countLeadingOnes();
+ unsigned LeadZ = Known2.countMinLeadingZeros();
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
- unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros();
- if (RHSUnknownLeadingOnes != BitWidth)
- LeadZ = std::min(BitWidth,
- LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+ unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
+ if (RHSMaxLeadingZeros != BitWidth)
+ LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
Known.Zero.setHighBits(LeadZ);
break;
@@ -2359,7 +2358,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
case ISD::CTTZ_ZERO_UNDEF: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
- unsigned PossibleTZ = Known2.One.countTrailingZeros();
+ unsigned PossibleTZ = Known2.countMaxTrailingZeros();
unsigned LowBits = Log2_32(PossibleTZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
@@ -2368,7 +2367,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
case ISD::CTLZ_ZERO_UNDEF: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we have a known 1, its position is our upper bound.
- unsigned PossibleLZ = Known2.One.countLeadingZeros();
+ unsigned PossibleLZ = Known2.countMaxLeadingZeros();
unsigned LowBits = Log2_32(PossibleLZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
@@ -2376,7 +2375,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
case ISD::CTPOP: {
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
// If we know some of the bits are zero, they can't be one.
- unsigned PossibleOnes = BitWidth - Known2.Zero.countPopulation();
+ unsigned PossibleOnes = Known2.countMaxPopulation();
Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
break;
}
@@ -2493,13 +2492,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// going to be 0 in the result. Both addition and complement operations
// preserve the low zero bits.
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
- unsigned KnownZeroLow = Known2.Zero.countTrailingOnes();
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
if (KnownZeroLow == 0)
break;
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
- KnownZeroLow = std::min(KnownZeroLow,
- Known2.Zero.countTrailingOnes());
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
Known.Zero.setLowBits(KnownZeroLow);
break;
}
@@ -2526,15 +2524,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// and the other has the top 8 bits clear, we know the top 7 bits of the
// output must be clear.
computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1);
- unsigned KnownZeroHigh = Known2.Zero.countLeadingOnes();
- unsigned KnownZeroLow = Known2.Zero.countTrailingOnes();
+ unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
computeKnownBits(Op.getOperand(1), Known2, DemandedElts,
Depth + 1);
- KnownZeroHigh = std::min(KnownZeroHigh,
- Known2.Zero.countLeadingOnes());
- KnownZeroLow = std::min(KnownZeroLow,
- Known2.Zero.countTrailingOnes());
+ KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
// With ADDE and ADDCARRY, a carry bit may be added in, so we can only
@@ -2594,8 +2590,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
- uint32_t Leaders = std::max(Known.Zero.countLeadingOnes(),
- Known2.Zero.countLeadingOnes());
+ uint32_t Leaders =
+ std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
Known.resetAll();
Known.Zero.setHighBits(Leaders);
break;
@@ -2711,8 +2707,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// UMIN - we know that the result will have the maximum of the
// known zero leading bits of the inputs.
- unsigned LeadZero = Known.Zero.countLeadingOnes();
- LeadZero = std::max(LeadZero, Known2.Zero.countLeadingOnes());
+ unsigned LeadZero = Known.countMinLeadingZeros();
+ LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros());
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
@@ -2726,8 +2722,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
// UMAX - we know that the result will have the maximum of the
// known one leading bits of the inputs.
- unsigned LeadOne = Known.One.countLeadingOnes();
- LeadOne = std::max(LeadOne, Known2.One.countLeadingOnes());
+ unsigned LeadOne = Known.countMinLeadingOnes();
+ LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes());
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
@@ -2843,8 +2839,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// Fall back to computeKnownBits to catch other known cases.
KnownBits Known;
computeKnownBits(Val, Known);
- return (Known.Zero.countPopulation() == BitWidth - 1) &&
- (Known.One.countPopulation() == 1);
+ return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
@@ -2860,6 +2855,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned NumElts = DemandedElts.getBitWidth();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -2903,6 +2899,39 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
}
return Tmp;
+ case ISD::VECTOR_SHUFFLE: {
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = SVN->getMaskElt(i);
+ if (!DemandedElts[i])
+ continue;
+ // For UNDEF elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M < 0)
+ return 1;
+ if ((unsigned)M < NumElts)
+ DemandedLHS.setBit((unsigned)M % NumElts);
+ else
+ DemandedRHS.setBit((unsigned)M % NumElts);
+ }
+ Tmp = UINT_MAX;
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
+
case ISD::SIGN_EXTEND:
case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
@@ -3142,14 +3171,36 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
- case ISD::EXTRACT_SUBVECTOR:
- return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If we know the element index, just demand that subvector elements,
+ // otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ }
+ return ComputeNumSignBits(Src, Depth + 1);
+ }
case ISD::CONCAT_VECTORS:
- // Determine the minimum number of sign bits across all input vectors.
- // Early out if the result is already 1.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i)
- Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1));
+ // Determine the minimum number of sign bits across all demanded
+ // elts of the input vectors. Early out if the result is already 1.
+ Tmp = UINT_MAX;
+ EVT SubVectorVT = Op.getOperand(0).getValueType();
+ unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+ unsigned NumSubVectors = Op.getNumOperands();
+ for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) {
+ APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+ DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+ if (!DemandedSub)
+ continue;
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
return Tmp;
}
@@ -3543,7 +3594,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
- return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
@@ -3559,8 +3610,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Operand.getValueType().bitsLT(VT) &&
"Invalid zext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
- return getNode(ISD::ZERO_EXTEND, DL, VT,
- Operand.getNode()->getOperand(0));
+ return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
@@ -3579,13 +3629,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
- return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
else if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// (ext (trunx x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
- SDValue OpOp = Operand.getNode()->getOperand(0);
+ SDValue OpOp = Operand.getOperand(0);
if (OpOp.getValueType() == VT)
return OpOp;
}
@@ -3601,16 +3651,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Operand.getValueType().bitsGT(VT) &&
"Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
- if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ if (Operand.getOperand(0).getValueType().getScalarType()
.bitsLT(VT.getScalarType()))
- return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
- if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
- return Operand.getNode()->getOperand(0);
+ return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ if (Operand.getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
+ return Operand.getOperand(0);
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
@@ -3665,15 +3715,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
// FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
- return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
- Operand.getNode()->getOperand(0),
- Operand.getNode()->getFlags());
+ return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
+ Operand.getOperand(0), Operand.getNode()->getFlags());
if (OpOpcode == ISD::FNEG) // --X -> X
- return Operand.getNode()->getOperand(0);
+ return Operand.getOperand(0);
break;
case ISD::FABS:
if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
- return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+ return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
break;
}
@@ -5970,7 +6019,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
- case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 1: return getNode(Opcode, DL, VT, Ops[0], Flags);
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
@@ -7520,9 +7569,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
KnownBits Known(PtrWidth);
- llvm::computeKnownBits(const_cast<GlobalValue *>(GV), Known,
- getDataLayout());
- unsigned AlignBits = Known.Zero.countTrailingOnes();
+ llvm::computeKnownBits(GV, Known, getDataLayout());
+ unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
return MinAlign(Align, GVOffset);
@@ -7621,7 +7669,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
return false;
// FIXME: The widths are based on this node's type, but build vectors can
- // truncate their operands.
+ // truncate their operands.
SplatValue = APInt(VecWidth, 0);
SplatUndef = APInt(VecWidth, 0);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 50313e2da884..57d340c41c39 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -661,7 +661,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
unsigned RegSize = RegisterVT.getSizeInBits();
unsigned NumSignBits = LOI->NumSignBits;
- unsigned NumZeroBits = LOI->Known.Zero.countLeadingOnes();
+ unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
if (NumZeroBits == RegSize) {
// The current value is a zero.
@@ -811,9 +811,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
}
}
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
const TargetLibraryInfo *li) {
- AA = &aa;
+ AA = aa;
GFI = gfi;
LibInfo = li;
DL = &DAG.getDataLayout();
@@ -3423,7 +3423,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
if (isVolatile || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(MemoryLocation(
+ else if (AA && AA->pointsToConstantMemory(MemoryLocation(
SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
@@ -3535,8 +3535,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
Type *Ty = I.getType();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- assert(!AA->pointsToConstantMemory(MemoryLocation(
- SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) &&
+ assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
+ SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@@ -3817,7 +3817,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation(
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
@@ -3861,7 +3861,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
bool ConstantMemory = false;
if (UniformBase &&
- AA->pointsToConstantMemory(MemoryLocation(
+ AA && AA->pointsToConstantMemory(MemoryLocation(
BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
AAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
@@ -4676,7 +4676,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
- if (int FI = FuncInfo.getArgumentFrameIndex(Arg))
+ int FI = FuncInfo.getArgumentFrameIndex(Arg);
+ if (FI != INT_MAX)
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
@@ -4927,6 +4928,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ // Byval arguments with frame indices were already handled after argument
+ // lowering and before isel.
+ const auto *Arg =
+ dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
+ if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ return nullptr;
+
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
// Check unused arguments map.
@@ -4957,20 +4965,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true,
N)) {
- // If variable is pinned by a alloca in dominating bb then
- // use StaticAllocaMap.
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
- if (AI->getParent() != DI.getParent()) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second,
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- return nullptr;
- }
- }
- }
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
@@ -5651,7 +5645,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
int FI = FuncInfo.StaticAllocaMap[Slot];
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
- GlobalValue::getRealLinkageName(MF.getName()), Idx);
+ GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
TII->get(TargetOpcode::LOCAL_ESCAPE))
.addSym(FrameAllocSym)
@@ -5672,7 +5666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
- GlobalValue::getRealLinkageName(Fn->getName()), IdxVal);
+ GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
// Create a MCSymbol for the label to avoid any target lowering
// that would make this PC relative.
@@ -5737,6 +5731,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
return nullptr;
+
+ case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ visitVectorReduce(I, Intrinsic);
+ return nullptr;
+ }
+
}
}
@@ -5982,7 +5994,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
bool ConstantMemory = false;
// Do not serialize (non-volatile) loads of constant memory with anything.
- if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+ if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
Root = Builder.DAG.getEntryNode();
ConstantMemory = true;
} else {
@@ -7422,11 +7434,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// have to worry about calling conventions and target specific lowering code.
// Instead we perform the call lowering right here.
//
- // chain, flag = CALLSEQ_START(chain, 0)
+ // chain, flag = CALLSEQ_START(chain, 0, 0)
// chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
//
- Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL);
+ Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
InFlag = Chain.getValue(1);
// Add the <id> and <numBytes> constants.
@@ -7616,6 +7628,76 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
+void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
+ unsigned Intrinsic) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2;
+ if (I.getNumArgOperands() > 1)
+ Op2 = getValue(I.getArgOperand(1));
+ SDLoc dl = getCurSDLoc();
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ SDValue Res;
+ FastMathFlags FMF;
+ if (isa<FPMathOperator>(I))
+ FMF = I.getFastMathFlags();
+ SDNodeFlags SDFlags;
+ SDFlags.setNoNaNs(FMF.noNaNs());
+
+ switch (Intrinsic) {
+ case Intrinsic::experimental_vector_reduce_fadd:
+ if (FMF.unsafeAlgebra())
+ Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
+ break;
+ case Intrinsic::experimental_vector_reduce_fmul:
+ if (FMF.unsafeAlgebra())
+ Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+ else
+ Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
+ break;
+ case Intrinsic::experimental_vector_reduce_add:
+ Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_mul:
+ Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_and:
+ Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_or:
+ Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_xor:
+ Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_smax:
+ Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_smin:
+ Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_umax:
+ Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_umin:
+ Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
+ break;
+ case Intrinsic::experimental_vector_reduce_fmax: {
+ Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
+ break;
+ }
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
+ break;
+ }
+ default:
+ llvm_unreachable("Unhandled vector reduce intrinsic");
+ }
+ setValue(&I, Res);
+}
+
/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 9e9989058ae5..bdaee858da61 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -604,11 +604,11 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
: CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), FuncInfo(funcinfo),
+ DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
HasTailCall(false) {
}
- void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+ void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
/// Clear out the current SelectionDAG and the associated state and prepare
@@ -909,6 +909,8 @@ private:
void visitGCRelocate(const GCRelocateInst &I);
void visitGCResult(const GCResultInst &I);
+ void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
+
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 26dd45ef933f..c37d7080f2c5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -346,6 +346,19 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SETFALSE: return "setfalse";
case ISD::SETFALSE2: return "setfalse2";
}
+ case ISD::VECREDUCE_FADD: return "vecreduce_fadd";
+ case ISD::VECREDUCE_FMUL: return "vecreduce_fmul";
+ case ISD::VECREDUCE_ADD: return "vecreduce_add";
+ case ISD::VECREDUCE_MUL: return "vecreduce_mul";
+ case ISD::VECREDUCE_AND: return "vecreduce_and";
+ case ISD::VECREDUCE_OR: return "vecreduce_or";
+ case ISD::VECREDUCE_XOR: return "vecreduce_xor";
+ case ISD::VECREDUCE_SMAX: return "vecreduce_smax";
+ case ISD::VECREDUCE_SMIN: return "vecreduce_smin";
+ case ISD::VECREDUCE_UMAX: return "vecreduce_umax";
+ case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
+ case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
+ case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3aabdaeaa094..5e0feccb6b4c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -299,7 +300,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
FuncInfo(new FunctionLoweringInfo()),
CurDAG(new SelectionDAG(tm, OL)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
- GFI(),
+ AA(), GFI(),
OptLevel(OL),
DAGSize(0) {
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
@@ -317,7 +318,8 @@ SelectionDAGISel::~SelectionDAGISel() {
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AAResultsWrapperPass>();
+ if (OptLevel != CodeGenOpt::None)
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
AU.addRequired<StackProtector>();
AU.addPreserved<StackProtector>();
@@ -394,7 +396,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
@@ -406,12 +407,22 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
+ // Now get the optional analyzes if we want to.
+ // This is based on the possibly changed OptLevel (after optnone is taken
+ // into account). That's unfortunate but OK because it just means we won't
+ // ask for passes that have been required anyway.
+
if (UseMBPI && OptLevel != CodeGenOpt::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
else
FuncInfo->BPI = nullptr;
- SDB->init(GFI, *AA, LibInfo);
+ if (OptLevel != CodeGenOpt::None)
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ else
+ AA = nullptr;
+
+ SDB->init(GFI, AA, LibInfo);
MF->setHasInlineAsm(false);
@@ -715,7 +726,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
+ CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -747,7 +758,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
GroupName, GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@@ -781,7 +792,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
GroupName, GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -807,7 +818,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
{
NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
GroupDescription, TimePassesIsEnabled);
- CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
+ CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -1145,6 +1156,51 @@ static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
}
}
+/// Collect llvm.dbg.declare information. This is done after argument lowering
+/// in case the declarations refer to arguments.
+static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
+ MachineFunction *MF = FuncInfo->MF;
+ const DataLayout &DL = MF->getDataLayout();
+ for (const BasicBlock &BB : *FuncInfo->Fn) {
+ for (const Instruction &I : BB) {
+ const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
+ if (!DI)
+ continue;
+
+ assert(DI->getVariable() && "Missing variable");
+ assert(DI->getDebugLoc() && "Missing location");
+ const Value *Address = DI->getAddress();
+ if (!Address)
+ continue;
+
+ // Look through casts and constant offset GEPs. These mostly come from
+ // inalloca.
+ APInt Offset(DL.getPointerSizeInBits(0), 0);
+ Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ // Check if the variable is a static alloca or a byval or inalloca
+ // argument passed in memory. If it is not, then we will ignore this
+ // intrinsic and handle this during isel like dbg.value.
+ int FI = INT_MAX;
+ if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
+ auto SI = FuncInfo->StaticAllocaMap.find(AI);
+ if (SI != FuncInfo->StaticAllocaMap.end())
+ FI = SI->second;
+ } else if (const auto *Arg = dyn_cast<Argument>(Address))
+ FI = FuncInfo->getArgumentFrameIndex(Arg);
+
+ if (FI == INT_MAX)
+ continue;
+
+ DIExpression *Expr = DI->getExpression();
+ if (Offset.getBoolValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::NoDeref,
+ Offset.getZExtValue());
+ MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
+ }
+ }
+}
+
/// Propagate swifterror values through the machine function CFG.
static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
auto *TLI = FuncInfo->TLI;
@@ -1317,6 +1373,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+ processDbgDeclares(FuncInfo);
+
// Iterate over all basic blocks in the function.
for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 23f597db140c..befbd80d7965 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -417,11 +417,10 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
TLI.isZExtFree(SmallVT, Op.getValueType())) {
// We found a type with free casts.
- SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
- DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
- Op.getNode()->getOperand(0)),
- DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
- Op.getNode()->getOperand(1)));
+ SDValue X = DAG.getNode(
+ Op.getOpcode(), dl, SmallVT,
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
+ DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
bool NeedZext = DemandedSize > SmallVTBits;
SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
dl, Op.getValueType(), X);
@@ -817,7 +816,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
// are not demanded. This will likely allow the anyext to be folded away.
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
- SDValue InnerOp = InOp.getNode()->getOperand(0);
+ SDValue InnerOp = InOp.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
unsigned InnerBits = InnerVT.getSizeInBits();
if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index 4837495777da..2638702da152 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -282,8 +282,14 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
if (!Restore)
Restore = &MBB;
- else
+ else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it
+ // means the block never returns. If that's the
+ // case, we don't want to call
+ // `findNearestCommonDominator`, which will
+ // return `Restore`.
Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
+ else
+ Restore = nullptr; // Abort, we can't find a restore point in this case.
// Make sure we would be able to insert the restore code before the
// terminator.
@@ -293,7 +299,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
- Restore = nullptr;
+ Restore = nullptr; // Abort, we can't find a restore point in this case.
break;
}
// Look for a restore point that post-dominates all the successors.
@@ -419,7 +425,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF,
}
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (MF.empty() || !isShrinkWrapEnabled(MF))
+ if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
return false;
DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index ab578df4069d..e9eff4d0acb2 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -93,8 +93,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
doubleUnderDataTy, // __data
VoidPtrTy, // __personality
VoidPtrTy, // __lsda
- doubleUnderJBufTy, // __jbuf
- nullptr);
+ doubleUnderJBufTy // __jbuf
+ );
return true;
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 34892680aceb..1d232c71d824 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -232,7 +232,11 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
if (!MD)
return nullptr;
- auto *VM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ const MDOperand &Op = MD->getOperand(0);
+ if (!Op.get())
+ return nullptr;
+
+ auto *VM = dyn_cast<ValueAsMetadata>(Op);
if (!VM)
report_fatal_error("MD_associated operand is not ValueAsMetadata");
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 150195f5f85b..e6c5d8753b83 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -487,6 +487,14 @@ void TargetPassConfig::addIRPasses() {
// Insert calls to mcount-like functions.
addPass(createCountingFunctionInserterPass());
+
+ // Add scalarization of target's unsupported masked memory intrinsics pass.
+ // the unsupported intrinsic will be replaced with a chain of basic blocks,
+ // that stores/loads element one-by-one if the appropriate mask bit is set.
+ addPass(createScalarizeMaskedMemIntrinPass());
+
+ // Expand reduction intrinsics into shuffle sequences if the target wants to.
+ addPass(createExpandReductionsPass());
}
/// Turn exception handling constructs into something the code generators can
@@ -607,6 +615,9 @@ void TargetPassConfig::addMachinePasses() {
addPass(&LocalStackSlotAllocationID, false);
}
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&LiveRangeShrinkID);
+
// Run pre-ra passes.
addPreRegAlloc();
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 75359fe3c0ea..7392c8327148 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -155,7 +155,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
+ AU.addUsedIfAvailable<AAResultsWrapperPass>();
AU.addUsedIfAvailable<LiveVariables>();
AU.addPreserved<LiveVariables>();
AU.addPreserved<SlotIndexes>();
@@ -1627,7 +1627,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
InstrItins = MF->getSubtarget().getInstrItineraryData();
LV = getAnalysisIfAvailable<LiveVariables>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
- AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ if (auto *AAPass = getAnalysisIfAvailable<AAResultsWrapperPass>())
+ AA = &AAPass->getAAResults();
+ else
+ AA = nullptr;
OptLevel = TM.getOptLevel();
bool MadeChange = false;
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index f085132b6a94..407fd9b162e9 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -206,11 +206,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
if (InputReg != OutputReg) {
MachineRegisterInfo &MRI = F.getRegInfo();
unsigned InputSub = Input.getSubReg();
- if (InputSub == 0) {
- MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg));
+ if (InputSub == 0 &&
+ MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) {
MRI.replaceRegWith(OutputReg, InputReg);
} else {
- // The input register to the PHI has a subregister:
+ // The input register to the PHI has a subregister or it can't be
+ // constrained to the proper register class:
// insert a COPY instead of simply replacing the output
// with the input.
const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();