aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp26
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp213
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp134
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp15
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp16
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp11
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp619
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp36
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp27
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp70
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h63
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp51
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp14
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp24
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp119
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp131
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp50
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp109
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp102
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp29
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp10
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp21
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp35
57 files changed, 1426 insertions, 782 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 87a3cede601b..5984063627b0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -354,8 +354,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -407,8 +406,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// Scan the register defs for this instruction and update
// live-ranges.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -495,8 +493,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "\tKill Group:");
unsigned FirstReg = 0;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -762,11 +759,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// ...need a map from MI to SUnit.
std::map<MachineInstr *, const SUnit *> MISUnitMap;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
- MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
- SU));
- }
+ for (const SUnit &SU : SUnits)
+ MISUnitMap.insert(std::make_pair(SU.getInstr(), &SU));
// Track progress along the critical path through the SUnit graph as
// we walk the instructions. This is needed for regclasses that only
@@ -774,12 +768,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
const SUnit *CriticalPathSU = nullptr;
MachineInstr *CriticalPathMI = nullptr;
if (CriticalPathSet.any()) {
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
+ for (const SUnit &SU : SUnits) {
if (!CriticalPathSU ||
- ((SU->getDepth() + SU->Latency) >
+ ((SU.getDepth() + SU.Latency) >
(CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
- CriticalPathSU = SU;
+ CriticalPathSU = &SU;
}
}
assert(CriticalPathSU && "Failed to find SUnit critical path");
@@ -839,8 +832,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// but don't cause any anti-dependence breaking themselves)
if (!MI.isKill()) {
// Attempt to break each anti-dependency...
- for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
- const SDep *Edge = Edges[i];
+ for (const SDep *Edge : Edges) {
SUnit *NextSU = Edge->getSUnit();
if ((Edge->getKind() != SDep::Anti) &&
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index cc848d28a9a7..828cb760b82e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -809,9 +809,9 @@ void AsmPrinter::emitFunctionHeader() {
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
- for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
OutStreamer->AddComment("Address taken block that was later removed");
- OutStreamer->emitLabel(DeadBlockSyms[i]);
+ OutStreamer->emitLabel(DeadBlockSym);
}
if (CurrentFnBegin) {
@@ -910,8 +910,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
std::string Str;
raw_string_ostream OS(Str);
OS << "kill:";
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &Op = MI->getOperand(i);
+ for (const MachineOperand &Op : MI->operands()) {
assert(Op.isReg() && "KILL instruction must have only register operands");
OS << ' ' << (Op.isDef() ? "def " : "killed ")
<< printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
@@ -2150,8 +2149,7 @@ void AsmPrinter::emitJumpTableInfo() {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
- for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
- const MachineBasicBlock *MBB = JTBBs[ii];
+ for (const MachineBasicBlock *MBB : JTBBs) {
if (!EmittedSets.insert(MBB).second)
continue;
@@ -2177,8 +2175,8 @@ void AsmPrinter::emitJumpTableInfo() {
MCSymbol* JTISymbol = GetJTISymbol(JTI);
OutStreamer->emitLabel(JTISymbol);
- for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
- emitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ for (const MachineBasicBlock *MBB : JTBBs)
+ emitJumpTableEntry(MJTI, MBB, JTI);
}
if (!JTInDiffSection)
OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index ef1abc47701a..5d0cadefdbf7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -128,191 +128,29 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
emitInlineAsmEnd(STI, &TAP->getSTI());
}
-static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, const MCAsmInfo *MAI,
- AsmPrinter *AP, uint64_t LocCookie,
- raw_ostream &OS) {
- // Switch to the inline assembly variant.
- OS << "\t.intel_syntax\n\t";
-
- int CurVariant = -1; // The number of the {.|.|.} region we are in.
- const char *LastEmitted = AsmStr; // One past the last character emitted.
- unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
-
- while (*LastEmitted) {
- switch (*LastEmitted) {
- default: {
- // Not a special case, emit the string section literally.
- const char *LiteralEnd = LastEmitted+1;
- while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
- *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
- ++LiteralEnd;
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS.write(LastEmitted, LiteralEnd - LastEmitted);
- LastEmitted = LiteralEnd;
- break;
- }
- case '\n':
- ++LastEmitted; // Consume newline character.
- OS << '\n'; // Indent code with newline.
- break;
- case '$': {
- ++LastEmitted; // Consume '$' character.
- bool Done = true;
-
- // Handle escapes.
- switch (*LastEmitted) {
- default: Done = false; break;
- case '$':
- ++LastEmitted; // Consume second '$' character.
- break;
- case '(': // $( -> same as GCC's { character.
- ++LastEmitted; // Consume '(' character.
- if (CurVariant != -1)
- report_fatal_error("Nested variants found in inline asm string: '" +
- Twine(AsmStr) + "'");
- CurVariant = 0; // We're in the first variant now.
- break;
- case '|':
- ++LastEmitted; // Consume '|' character.
- if (CurVariant == -1)
- OS << '|'; // This is gcc's behavior for | outside a variant.
- else
- ++CurVariant; // We're in the next variant.
- break;
- case ')': // $) -> same as GCC's } char.
- ++LastEmitted; // Consume ')' character.
- if (CurVariant == -1)
- OS << '}'; // This is gcc's behavior for } outside a variant.
- else
- CurVariant = -1;
- break;
- }
- if (Done) break;
-
- bool HasCurlyBraces = false;
- if (*LastEmitted == '{') { // ${variable}
- ++LastEmitted; // Consume '{' character.
- HasCurlyBraces = true;
- }
-
- // If we have ${:foo}, then this is not a real operand reference, it is a
- // "magic" string reference, just like in .td files. Arrange to call
- // PrintSpecial.
- if (HasCurlyBraces && *LastEmitted == ':') {
- ++LastEmitted;
- const char *StrStart = LastEmitted;
- const char *StrEnd = strchr(StrStart, '}');
- if (!StrEnd)
- report_fatal_error("Unterminated ${:foo} operand in inline asm"
- " string: '" + Twine(AsmStr) + "'");
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
- LastEmitted = StrEnd+1;
- break;
- }
-
- const char *IDStart = LastEmitted;
- const char *IDEnd = IDStart;
- while (isDigit(*IDEnd))
- ++IDEnd;
-
- unsigned Val;
- if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
- report_fatal_error("Bad $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
- LastEmitted = IDEnd;
-
- if (Val >= NumOperands - 1)
- report_fatal_error("Invalid $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
-
- char Modifier[2] = { 0, 0 };
-
- if (HasCurlyBraces) {
- // If we have curly braces, check for a modifier character. This
- // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
- if (*LastEmitted == ':') {
- ++LastEmitted; // Consume ':' character.
- if (*LastEmitted == 0)
- report_fatal_error("Bad ${:} expression in inline asm string: '" +
- Twine(AsmStr) + "'");
-
- Modifier[0] = *LastEmitted;
- ++LastEmitted; // Consume modifier character.
- }
-
- if (*LastEmitted != '}')
- report_fatal_error("Bad ${} expression in inline asm string: '" +
- Twine(AsmStr) + "'");
- ++LastEmitted; // Consume '}' character.
- }
-
- // Okay, we finally have a value number. Ask the target to print this
- // operand!
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = InlineAsm::MIOp_FirstOperand;
-
- bool Error = false;
-
- // Scan to find the machine operand number for the operand.
- for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands())
- break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
- }
-
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
- Error = true;
- } else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
-
- // FIXME: Shouldn't arch-independent output template handling go into
- // PrintAsmOperand?
- // Labels are target independent.
- if (MI->getOperand(OpNo).isBlockAddress()) {
- const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
- MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
- Sym->print(OS, AP->MAI);
- MMI->getContext().registerInlineAsmLabel(Sym);
- } else if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
- } else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
- }
- }
- if (Error) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
- }
- }
- break;
- }
- }
+static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
+ bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel;
+
+ if (InputIsIntelDialect) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
}
- OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
-}
-static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, const MCAsmInfo *MAI,
- AsmPrinter *AP, uint64_t LocCookie,
- raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
- if (MAI->getEmitGNUAsmStartIndentationMarker())
+ int AsmPrinterVariant;
+ if (InputIsIntelDialect)
+ AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
+ else
+ AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
+
+ // FIXME: Should this happen for `asm inteldialect` as well?
+ if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker())
OS << '\t';
while (*LastEmitted) {
@@ -340,8 +178,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
switch (*LastEmitted) {
default: Done = false; break;
case '$': // $$ -> $
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS << '$';
+ if (!InputIsIntelDialect)
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
++LastEmitted; // Consume second '$' character.
break;
case '(': // $( -> same as GCC's { character.
@@ -480,6 +319,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
}
}
+ if (InputIsIntelDialect)
+ OS << "\n\t.att_syntax";
OS << '\n' << (char)0; // null terminate string.
}
@@ -515,9 +356,8 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
// it.
uint64_t LocCookie = 0;
const MDNode *LocMD = nullptr;
- for (unsigned i = MI->getNumOperands(); i != 0; --i) {
- if (MI->getOperand(i-1).isMetadata() &&
- (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ for (const MachineOperand &MO : llvm::reverse(MI->operands())) {
+ if (MO.isMetadata() && (LocMD = MO.getMetadata()) &&
LocMD->getNumOperands() != 0) {
if (const ConstantInt *CI =
mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
@@ -533,10 +373,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
raw_svector_ostream OS(StringData);
AsmPrinter *AP = const_cast<AsmPrinter*>(this);
- if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
- EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
- else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
+ EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might lead to undefined behaviour.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a36d2966d44a..9b73f0ab2f05 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -521,8 +521,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
}
// Construct a DIE for this scope.
-void DwarfCompileUnit::constructScopeDIE(
- LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) {
+void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope,
+ DIE &ParentScopeDIE) {
if (!Scope || !Scope->getScopeNode())
return;
@@ -533,46 +533,27 @@ void DwarfCompileUnit::constructScopeDIE(
"constructSubprogramScopeDIE for non-inlined "
"subprograms");
- SmallVector<DIE *, 8> Children;
-
- // We try to create the scope DIE first, then the children DIEs. This will
- // avoid creating un-used children then removing them later when we find out
- // the scope DIE is null.
- DIE *ScopeDIE;
+ // Emit inlined subprograms.
if (Scope->getParent() && isa<DISubprogram>(DS)) {
- ScopeDIE = constructInlinedScopeDIE(Scope);
+ DIE *ScopeDIE = constructInlinedScopeDIE(Scope);
if (!ScopeDIE)
return;
- // We create children when the scope DIE is not null.
- createScopeChildrenDIE(Scope, Children);
- } else {
- // Early exit when we know the scope DIE is going to be null.
- if (DD->isLexicalScopeDIENull(Scope))
- return;
-
- bool HasNonScopeChildren = false;
- // We create children here when we know the scope DIE is not going to be
- // null and the children will be added to the scope DIE.
- createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);
-
- // If there are only other scopes as children, put them directly in the
- // parent instead, as this scope would serve no purpose.
- if (!HasNonScopeChildren) {
- FinalChildren.insert(FinalChildren.end(),
- std::make_move_iterator(Children.begin()),
- std::make_move_iterator(Children.end()));
- return;
- }
- ScopeDIE = constructLexicalScopeDIE(Scope);
- assert(ScopeDIE && "Scope DIE should not be null.");
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
+ return;
}
- // Add children
- for (auto &I : Children)
- ScopeDIE->addChild(std::move(I));
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ // Emit lexical blocks.
+ DIE *ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
- FinalChildren.push_back(std::move(ScopeDIE));
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
}
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
@@ -1022,42 +1003,6 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
return Result;
}
-DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children,
- bool *HasNonScopeChildren) {
- assert(Children.empty());
- DIE *ObjectPointer = nullptr;
-
- // Emit function arguments (order is significant).
- auto Vars = DU->getScopeVariables().lookup(Scope);
- for (auto &DV : Vars.Args)
- Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
-
- // Emit local variables.
- auto Locals = sortLocalVars(Vars.Locals);
- for (DbgVariable *DV : Locals)
- Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
-
- // Skip imported directives in gmlt-like data.
- if (!includeMinimalInlineScopes()) {
- // There is no need to emit empty lexical block DIE.
- for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
- Children.push_back(
- constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
- }
-
- if (HasNonScopeChildren)
- *HasNonScopeChildren = !Children.empty();
-
- for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
- Children.push_back(constructLabelDIE(*DL, *Scope));
-
- for (LexicalScope *LS : Scope->getChildren())
- constructScopeDIE(LS, Children);
-
- return ObjectPointer;
-}
-
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
@@ -1088,13 +1033,48 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
DIE &ScopeDIE) {
- // We create children when the scope DIE is not null.
- SmallVector<DIE *, 8> Children;
- DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
+ DIE *ObjectPointer = nullptr;
+
+ // Emit function arguments (order is significant).
+ auto Vars = DU->getScopeVariables().lookup(Scope);
+ for (auto &DV : Vars.Args)
+ ScopeDIE.addChild(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
+
+ // Emit local variables.
+ auto Locals = sortLocalVars(Vars.Locals);
+ for (DbgVariable *DV : Locals)
+ ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ // Emit imported entities (skipped in gmlt-like data).
+ if (!includeMinimalInlineScopes()) {
+ for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
+ ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
+ }
+
+ // Emit labels.
+ for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+ ScopeDIE.addChild(constructLabelDIE(*DL, *Scope));
- // Add children
- for (auto &I : Children)
- ScopeDIE.addChild(std::move(I));
+ // Emit inner lexical scopes.
+ auto needToEmitLexicalScope = [this](LexicalScope *LS) {
+ if (isa<DISubprogram>(LS->getScopeNode()))
+ return true;
+ auto Vars = DU->getScopeVariables().lookup(LS);
+ if (!Vars.Args.empty() || !Vars.Locals.empty())
+ return true;
+ if (!includeMinimalInlineScopes() &&
+ !ImportedEntities[LS->getScopeNode()].empty())
+ return true;
+ return false;
+ };
+ for (LexicalScope *LS : Scope->getChildren()) {
+ // If the lexical block doesn't have non-scope children, skip
+ // its emission and put its children directly to the parent scope.
+ if (needToEmitLexicalScope(LS))
+ constructScopeDIE(LS, ScopeDIE);
+ else
+ createAndAddScopeChildren(LS, ScopeDIE);
+ }
return ObjectPointer;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6e9261087686..fb03982b5e4a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -191,8 +191,7 @@ public:
/// variables.
DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
- void constructScopeDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &FinalChildren);
+ void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
/// A helper function to construct a RangeSpanList for a given
/// lexical scope.
@@ -220,11 +219,6 @@ public:
/// Construct a DIE for the given DbgLabel.
DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
- /// A helper function to create children of a Scope DIE.
- DIE *createScopeChildrenDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children,
- bool *HasNonScopeChildren = nullptr);
-
void createBaseTypeDIEs();
/// Construct a DIE for this subprogram scope.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 150f19324834..39f40b172c1b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -162,9 +162,7 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
bool MarkedNoUnwind = false;
bool SawFunc = false;
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
-
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isGlobal()) continue;
const Function *F = dyn_cast<Function>(MO.getGlobal());
@@ -386,8 +384,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
SmallVector<const LandingPadInfo *, 64> LandingPads;
LandingPads.reserve(PadInfos.size());
- for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
- LandingPads.push_back(&PadInfos[i]);
+ for (const LandingPadInfo &LPI : PadInfos)
+ LandingPads.push_back(&LPI);
// Order landing pads lexicographically by type id.
llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
index 5ac8f49a9522..64dadc82b48b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1013,8 +1013,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// If this is a large problem, avoid visiting the same basic blocks
// multiple times.
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ for (const MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
index 50825ccf9bac..eda0f37fdeb7 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -513,9 +513,7 @@ bool BranchRelaxation::relaxBranchInstructions() {
// Relaxing branches involves creating new basic blocks, so re-eval
// end() for termination.
- for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
- MachineBasicBlock &MBB = *I;
-
+ for (MachineBasicBlock &MBB : *MF) {
// Empty block?
MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
if (Last == MBB.end())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
index e0e2db9f4725..bbdd8aab502e 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp
@@ -58,8 +58,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
+ initializeMIRAddFSDiscriminatorsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
initializeMIRNamerPass(Registry);
+ initializeMIRProfileLoaderPassPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
index a1ff02178ffa..3bed81d5841d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp
@@ -90,7 +90,7 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, ValueTrackingVariableLocations)
+CGOPT_EXP(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
@@ -534,12 +534,17 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
+ if (auto Opt = getExplicitValueTrackingVariableLocations())
+ Options.ValueTrackingVariableLocations = *Opt;
+ else
+ Options.ValueTrackingVariableLocations =
+ getDefaultValueTrackingVariableLocations(TheTriple);
+
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
@@ -692,3 +697,9 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
}
+
+bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) {
+ if (T.getArch() == llvm::Triple::x86_64)
+ return true;
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 981f5973fee8..4e98d49206b5 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -370,9 +370,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
// Handle cases in which this instruction defines NewReg.
MachineInstr *MI = RefOper->getParent();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &CheckOper = MI->getOperand(i);
-
+ for (const MachineOperand &CheckOper : MI->operands()) {
if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
return true;
@@ -462,11 +460,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// Find the node at the bottom of the critical path.
const SUnit *Max = nullptr;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
- MISUnitMap[SU->getInstr()] = SU;
- if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
- Max = SU;
+ for (const SUnit &SU : SUnits) {
+ MISUnitMap[SU.getInstr()] = &SU;
+ if (!Max || SU.getDepth() + SU.Latency > Max->getDepth() + Max->Latency)
+ Max = &SU;
}
assert(Max && "Failed to find bottom of the critical path");
@@ -621,8 +618,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// is invalid. If the instruction defines other registers,
// save a list of them so that we don't pick a new register
// that overlaps any of them.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index c6c0b79cd7e7..0bb186a02416 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -76,8 +76,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return false;
// Examine each operand.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -87,7 +86,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
} else {
if (MO.isDead()) {
#ifndef NDEBUG
- // Sanity check on uses of this dead register. All of them should be
+ // Baisc check on the register. All of them should be
// 'undef'.
for (auto &U : MRI->use_nodbg_operands(Reg))
assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
@@ -152,8 +151,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg defs.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -171,8 +169,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg uses, after the defs, in case a physreg is
// both defined and used in the same instruction.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3a52959d54bf..755b3b844570 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
@@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
Builder.setInstrAndDebugLoc(MI);
auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
NewPhi.addDef(DstReg);
- for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
- auto &MO = MI.getOperand(SrcIdx);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg()) {
NewPhi.addMBB(MO.getMBB());
continue;
@@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
unsigned NumElts = DstTy.getNumElements();
SmallBitVector ExtractedElts(NumElts);
- for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
- MRI.use_instr_nodbg_end())) {
+ for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
@@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase(
MatchInfo(Builder);
}
+bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+
+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;
+ unsigned FshOpc = 0;
+
+ // Match (or (shl x, amt), (lshr y, sub(bw, amt))).
+ if (mi_match(
+ Dst, MRI,
+ // m_GOr() handles the commuted version as well.
+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+ m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(LShrAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHL;
+
+ // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).
+ } else if (mi_match(Dst, MRI,
+ m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),
+ m_GShl(m_Reg(ShlSrc),
+ m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(ShlAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHR;
+
+ } else {
+ return false;
+ }
+
+ if (ShlAmt != LShrAmt)
+ return false;
+
+ LLT AmtTy = MRI.getType(ShlAmt);
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});
+ };
+ return true;
+}
+
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
unsigned Opc = MI.getOpcode();
assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
- // Check for a constant 2 or a splat of 2 on the RHS.
- auto RHS = MI.getOperand(3).getReg();
- bool IsVector = MRI.getType(RHS).isVector();
- if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
- return false;
- if (IsVector) {
- // FIXME: There's no mi_match pattern for this yet.
- auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
- if (!RHSDef)
- return false;
- auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
- if (!Splat || *Splat != 2)
- return false;
- }
+
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
+ return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Observer.changingInstr(MI);
@@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
return true;
}
+/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
+/// due to global flags or MachineInstr flags.
+static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
+ if (MI.getOpcode() != TargetOpcode::G_FMUL)
+ return false;
+ return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
+}
+
+static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
+ const MachineRegisterInfo &MRI) {
+ return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end()) >
+ std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end());
+}
+
+bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
+ bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate) {
+
+ auto *MF = MI.getMF();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetOptions &Options = MF->getTarget().Options;
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ if (CanReassociate &&
+ !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
+ return false;
+
+ // Floating-point multiply-add with intermediate rounding.
+ HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return false;
+
+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD;
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
+ return false;
+
+ Aggressive = TLI.enableAggressiveFMAFusion(DstType);
+ return true;
+}
+
+bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ if (isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ MachineInstr *FpExtSrc;
+ if (mi_match(LHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
+ // Note: Commutes FADD operands.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ MachineInstr *FMA = nullptr;
+ Register Z;
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) {
+ FMA = LHS;
+ Z = RHS->getOperand(0).getReg();
+ }
+ // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
+ else if (RHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) {
+ Z = LHS->getOperand(0).getReg();
+ FMA = RHS;
+ }
+
+ if (FMA) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
+ Register X = FMA->getOperand(1).getReg();
+ Register Y = FMA->getOperand(2).getReg();
+ Register U = FMulMI->getOperand(1).getReg();
+ Register V = FMulMI->getOperand(2).getReg();
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
+ B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ if (!Aggressive)
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
+ auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
+ Register Y, MachineIRBuilder &B) {
+ Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
+ Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
+ Register InnerFMA =
+ B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
+ .getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+
+ MachineInstr *FMulMI, *FMAMI;
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(),
+ LHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), X, Y, B);
+ };
+
+ return true;
+ }
+ }
+
+ // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (RHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(),
+ RHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), X, Y, B);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ int FirstMulHasFewerUses = true;
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally) &&
+ hasMoreUses(*LHS, *RHS, MRI))
+ FirstMulHasFewerUses = false;
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
+ if (FirstMulHasFewerUses &&
+ (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+ // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
+ else if ((isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegX =
+ B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegX, FMulMI->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtX =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX, FpExtY, NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
+ if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
+ Register FpExtZ =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, FpExtZ, LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
+ MachineIRBuilder &B) {
+ Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
+ Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
+ };
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fneg (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ // fold (fsub (fneg (fpext (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
+ buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHSReg, B);
+ B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg, B);
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c74bec7dfc0d..e09cd26eb0c1 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -585,8 +585,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
// FIXME: What does the original arg index mean here?
SmallVector<CallLowering::ArgInfo, 3> Args;
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- Args.push_back({MI.getOperand(i).getReg(), OpType, 0});
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ Args.push_back({MO.getReg(), OpType, 0});
return createLibcall(MIRBuilder, Libcall,
{MI.getOperand(0).getReg(), OpType, 0}, Args);
}
@@ -1500,8 +1500,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
// Decompose the original operands if they don't evenly divide.
- for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register SrcReg = MI.getOperand(I).getReg();
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ Register SrcReg = MO.getReg();
if (GCD == SrcSize) {
Unmerges.push_back(SrcReg);
} else {
@@ -4037,8 +4037,8 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
// Break into a common type
SmallVector<Register, 16> Parts;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ extractGCDType(Parts, GCDTy, MO.getReg());
// Build the requested new merge, padding with undef.
LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
@@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
// of that value loaded. This can result in a sequence of loads and stores
// mixed types, depending on what the target specifies as good types to use.
unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
unsigned Size = KnownLen;
for (auto CopyTy : MemOps) {
// Issuing an unaligned load / store pair that overlaps with the previous
@@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
Register LoadPtr = Src;
Register Offset;
if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ LLT SrcTy = MRI.getType(Src);
+ Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
.getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
// Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
+ }
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
Size -= CopyTy.getSizeInBytes();
@@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
// Apart from that, this loop is pretty much doing the same thing as the
// memcpy codegen function.
unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
SmallVector<Register, 16> LoadVals;
for (auto CopyTy : MemOps) {
// Construct MMO for the load.
@@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
// Create the load.
Register LoadPtr = Src;
if (CurrOffset != 0) {
+ LLT SrcTy = MRI.getType(Src);
auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
CurrOffset += CopyTy.getSizeInBytes();
@@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
Register StorePtr = Dst;
if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 1a2102e3ef21..650500c7eb31 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -123,7 +123,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
Register Reg = MI.getOperand(OpIdx).getReg();
const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
- // Sanity check that the target properly implemented getRegBankFromRegClass.
+ // Check that the target properly implemented getRegBankFromRegClass.
assert(RegBank.covers(*RC) &&
"The mapping of the register bank does not make sense");
return &RegBank;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1a440c064a59..b0b84763e922 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -834,10 +834,9 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
case TargetOpcode::G_BUILD_VECTOR: {
// TODO: Probably should have a recursion depth guard since you could have
// bitcasted vector elements.
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB))
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB))
return false;
- }
return true;
}
@@ -845,8 +844,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// Only handle constants since we would need to know if number of leading
// zeros is greater than the truncation amount.
const unsigned BitWidth = Ty.getScalarSizeInBits();
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ auto Const = getIConstantVRegVal(MO.getReg(), MRI);
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
return false;
}
@@ -1031,16 +1030,22 @@ Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
return SplatValAndReg;
}
-bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue, bool AllowUndef) {
- if (auto SplatValAndReg =
- getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef))
+} // end anonymous namespace
+
+bool llvm::isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
return false;
}
-} // end anonymous namespace
+bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue,
+ AllowUndef);
+}
Optional<int64_t>
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
index 6c1ce4c1efb0..bbd9006a5d8c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -399,8 +399,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// having a single global, but is aggressive enough for any other case.
if (GlobalMergeIgnoreSingleUse) {
BitVector AllGlobals(Globals.size());
- for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
- const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
if (UGS.UsageCount == 0)
continue;
if (UGS.Globals.count() > 1)
@@ -418,8 +417,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
BitVector PickedGlobals(Globals.size());
bool Changed = false;
- for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
- const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
if (UGS.UsageCount == 0)
continue;
if (PickedGlobals.anyCommon(UGS.Globals))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index e4606daba352..2d38a44d5a33 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -260,10 +260,12 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
if (DTU) {
// If there were multiple indirectbr's, they may have common successors,
// but in the dominator tree, we only track unique edges.
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end());
- Updates.reserve(Updates.size() + UniqueSuccessors.size());
- for (BasicBlock *BB : UniqueSuccessors)
- Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessors;
+ Updates.reserve(Updates.size() + BBs.size());
+ for (BasicBlock *BB : BBs) {
+ if (UniqueSuccessors.insert(BB).second)
+ Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ }
DTU->applyUpdates(Updates);
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
index 64e1f4351456..fc5ac45752ca 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -274,11 +274,9 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
}
static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
LIS.getInterval(MO.getReg());
- }
}
/// isSnippet - Identify if a live interval is a snippet that should be spilled.
@@ -583,11 +581,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
if (!ParentVNI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
MO.setIsUndef();
- }
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index c3e0553418a5..fab6b8d10a33 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -73,11 +73,9 @@ void LatencyPriorityQueue::push(SUnit *SU) {
// Look at all of the successors of this node. Count the number of nodes that
// this node is the sole unscheduled node for.
unsigned NumNodesBlocking = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
++NumNodesBlocking;
- }
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
Queue.push_back(SU);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index a4eb3094612b..cf62b0e5d7e8 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -544,8 +544,7 @@ public:
// Re-state the variable location: if there's no replacement then NewLoc
// is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE
// identifying the alternative location will be emitted.
- const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr;
- DbgValueProperties Properties(Expr, false);
+ const DbgValueProperties &Properties = ActiveVLocIt->second.Properties;
PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties));
// Update machine locations <=> variable locations maps. Defer updating
@@ -836,6 +835,15 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
unsigned Base = Spill.SpillBase;
MIB.addReg(Base);
MIB.addImm(0);
+
+ // Being on the stack makes this location indirect; if it was _already_
+ // indirect though, we need to add extra indirection. See this test for
+ // a scenario where this happens:
+ // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll
+ if (Properties.Indirect) {
+ std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
+ Expr = DIExpression::append(Expr, Elts);
+ }
} else {
// This is a stack location with a weird subregister offset: emit an undef
// DBG_VALUE instead.
@@ -1288,6 +1296,24 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
} else if (MI.isMetaInstruction())
return;
+ // We always ignore SP defines on call instructions, they don't actually
+ // change the value of the stack pointer... except for win32's _chkstk. This
+ // is rare: filter quickly for the common case (no stack adjustments, not a
+ // call, etc). If it is a call that modifies SP, recognise the SP register
+ // defs.
+ bool CallChangesSP = false;
+ if (AdjustsStackInCalls && MI.isCall() && MI.getOperand(0).isSymbol() &&
+ !strcmp(MI.getOperand(0).getSymbolName(), StackProbeSymbolName.data()))
+ CallChangesSP = true;
+
+ // Test whether we should ignore a def of this register due to it being part
+ // of the stack pointer.
+ auto IgnoreSPAlias = [this, &MI, CallChangesSP](Register R) -> bool {
+ if (CallChangesSP)
+ return false;
+ return MI.isCall() && MTracker->SPAliases.count(R);
+ };
+
// Find the regs killed by MI, and find regmasks of preserved regs.
// Max out the number of statically allocated elements in `DeadRegs`, as this
// prevents fallback to std::set::count() operations.
@@ -1298,7 +1324,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
- !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {
+ !IgnoreSPAlias(MO.getReg())) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
@@ -1347,6 +1373,9 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
continue;
Register Reg = MTracker->LocIdxToLocID[L.Idx];
+ if (IgnoreSPAlias(Reg))
+ continue;
+
for (auto *MO : RegMaskPtrs)
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
@@ -1628,9 +1657,10 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
/// fragments of that DILocalVariable which overlap. This reduces work during
/// the data-flow stage from "Find any overlapping fragments" to "Check if the
/// known-to-overlap fragments are present".
-/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// \param MI A previously unprocessed debug instruction to analyze for
/// fragment usage.
void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
@@ -1732,7 +1762,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
for (auto &MI : MBB) {
process(MI);
// Also accumulate fragment map.
- if (MI.isDebugValue())
+ if (MI.isDebugValue() || MI.isDebugRef())
accumulateFragmentMap(MI);
// Create a map from the instruction number (if present) to the
@@ -2322,15 +2352,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
bool InstrRefBasedLDV::vlocJoin(
MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn) {
- // To emulate VarLocBasedImpl, process this block if it's not in scope but
- // _does_ assign a variable value. No live-ins for this scope are transferred
- // in though, so we can return immediately.
- if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB))
- return false;
-
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -2466,11 +2489,10 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
// "blocks that are potentially in scope. See comment at start of vlocJoin.
SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
- // Old LiveDebugValues tracks variable locations that come out of blocks
- // not in scope, where DBG_VALUEs occur. This is something we could
- // legitimately ignore, but lets allow it for now.
- if (EmulateOldLDV)
- BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+ // VarLoc LiveDebugValues tracks variable locations that are defined in
+ // blocks not in scope. This is something we could legitimately ignore, but
+ // lets allow it for now for the sake of coverage.
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
// We also need to propagate variable values through any artificial blocks
// that immediately follow blocks in scope.
@@ -2635,7 +2657,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
// Join values from predecessors. Updates LiveInIdx, and writes output
// into JoinedInLocs.
bool InLocsChanged =
- vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn);
+ vlocJoin(*MBB, LiveOutIdx, BlocksToExplore, *LiveIn);
SmallVector<const MachineBasicBlock *, 8> Preds;
for (const auto *Pred : MBB->predecessors())
@@ -2730,6 +2752,8 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
continue;
if (BlockLiveIn->Kind == DbgValue::VPHI)
BlockLiveIn->Kind = DbgValue::Def;
+ assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() ==
+ Var.getFragment() && "Fragment info missing during value prop");
Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
}
} // Per-variable loop.
@@ -2879,6 +2903,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MFI = &MF.getFrameInfo();
LS.initialize(MF);
+ const auto &STI = MF.getSubtarget();
+ AdjustsStackInCalls = MFI->adjustsStack() &&
+ STI.getFrameLowering()->stackProbeFunctionModifiesSP();
+ if (AdjustsStackInCalls)
+ StackProbeSymbolName = STI.getTargetLowering()->getStackProbeSymbolName(MF);
+
MTracker =
new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering());
VTracker = nullptr;
@@ -2895,7 +2925,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
++MaxNumBlocks;
MLocTransfer.resize(MaxNumBlocks);
- vlocs.resize(MaxNumBlocks);
+ vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
SavedLiveIns.resize(MaxNumBlocks);
initialSetup(MF);
@@ -3040,6 +3070,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
BBNumToRPO.clear();
DebugInstrNumToInstr.clear();
DebugPHINumToValue.clear();
+ OverlapFragments.clear();
+ SeenFragments.clear();
return Changed;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index d96ef6d4f6e5..789205e61cdb 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -655,6 +655,14 @@ public:
const DbgValueProperties &Properties);
};
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
/// Collection of DBG_VALUEs observed when traversing a block. Records each
/// variable and the value the DBG_VALUE refers to. Requires the machine value
/// location dataflow algorithm to have run already, so that values can be
@@ -672,9 +680,12 @@ public:
MapVector<DebugVariable, DbgValue> Vars;
DenseMap<DebugVariable, const DILocation *> Scopes;
MachineBasicBlock *MBB = nullptr;
+ const OverlapMap &OverlappingFragments;
+ DbgValueProperties EmptyProperties;
public:
- VLocTracker() {}
+ VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr)
+ : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {}
void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
Optional<ValueIDNum> ID) {
@@ -689,6 +700,8 @@ public:
if (!Result.second)
Result.first->second = Rec;
Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
}
void defVar(const MachineInstr &MI, const MachineOperand &MO) {
@@ -704,16 +717,37 @@ public:
if (!Result.second)
Result.first->second = Rec;
Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
}
-};
-/// Types for recording sets of variable fragments that overlap. For a given
-/// local variable, we record all other fragments of that variable that could
-/// overlap it, to reduce search time.
-using FragmentOfVar =
- std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
-using OverlapMap =
- DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+ void considerOverlaps(const DebugVariable &Var, const DILocation *Loc) {
+ auto Overlaps = OverlappingFragments.find(
+ {Var.getVariable(), Var.getFragmentOrDefault()});
+ if (Overlaps == OverlappingFragments.end())
+ return;
+
+ // Otherwise: terminate any overlapped variable locations.
+ for (auto FragmentInfo : Overlaps->second) {
+ // The "empty" fragment is stored as DebugVariable::DefaultFragment, so
+ // that it overlaps with everything, however its cannonical representation
+ // in a DebugVariable is as "None".
+ Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo;
+ if (DebugVariable::isDefaultFragment(FragmentInfo))
+ OptFragmentInfo = None;
+
+ DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo,
+ Var.getInlinedAt());
+ DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Overlapped, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Overlapped] = Loc;
+ }
+ }
+};
// XXX XXX docs
class InstrRefBasedLDV : public LDVImpl {
@@ -817,6 +851,16 @@ private:
OverlapMap OverlapFragments;
VarToFragments SeenFragments;
+ /// True if we need to examine call instructions for stack clobbers. We
+ /// normally assume that they don't clobber SP, but stack probes on Windows
+ /// do.
+ bool AdjustsStackInCalls = false;
+
+ /// If AdjustsStackInCalls is true, this holds the name of the target's stack
+ /// probe function, which is the function we expect will alter the stack
+ /// pointer.
+ StringRef StackProbeSymbolName;
+
/// Tests whether this instruction is a spill to a stack slot.
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
@@ -962,7 +1006,6 @@ private:
/// \returns true if any live-ins change value, either from value propagation
/// or PHI elimination.
bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
index dcd546f9c6db..5f976bf43c5b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -1875,34 +1875,57 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
- // Re-insert any debug instrs back in the position they were. Ordering
- // is preserved by vector. We must re-insert in the same order to ensure that
- // debug instructions don't swap, which could re-order assignments.
- for (auto &P : StashedDebugInstrs) {
- SlotIndex Idx = P.Idx;
+ // Re-insert any debug instrs back in the position they were. We must
+ // re-insert in the same order to ensure that debug instructions don't swap,
+ // which could re-order assignments. Do so in a batch -- once we find the
+ // insert position, insert all instructions at the same SlotIdx. They are
+ // guaranteed to appear in-sequence in StashedDebugInstrs because we insert
+ // them in order.
+ for (auto StashIt = StashedDebugInstrs.begin();
+ StashIt != StashedDebugInstrs.end(); ++StashIt) {
+ SlotIndex Idx = StashIt->Idx;
+ MachineBasicBlock *MBB = StashIt->MBB;
+ MachineInstr *MI = StashIt->MI;
+
+ auto EmitInstsHere = [this, &StashIt, MBB, Idx,
+ MI](MachineBasicBlock::iterator InsertPos) {
+ // Insert this debug instruction.
+ MBB->insert(InsertPos, MI);
+
+ // Look at subsequent stashed debug instructions: if they're at the same
+ // index, insert those too.
+ auto NextItem = std::next(StashIt);
+ while (NextItem != StashedDebugInstrs.end() && NextItem->Idx == Idx) {
+ assert(NextItem->MBB == MBB && "Instrs with same slot index should be"
+ "in the same block");
+ MBB->insert(InsertPos, NextItem->MI);
+ StashIt = NextItem;
+ NextItem = std::next(StashIt);
+ };
+ };
// Start block index: find the first non-debug instr in the block, and
// insert before it.
- if (Idx == Slots->getMBBStartIdx(P.MBB)) {
+ if (Idx == Slots->getMBBStartIdx(MBB)) {
MachineBasicBlock::iterator InsertPos =
- findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap);
- P.MBB->insert(InsertPos, P.MI);
+ findInsertLocation(MBB, Idx, *LIS, BBSkipInstsMap);
+ EmitInstsHere(InsertPos);
continue;
}
if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) {
// Insert at the end of any debug instructions.
auto PostDebug = std::next(Pos->getIterator());
- PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end());
- P.MBB->insert(PostDebug, P.MI);
+ PostDebug = skipDebugInstructionsForward(PostDebug, MBB->instr_end());
+ EmitInstsHere(PostDebug);
} else {
// Insert position disappeared; walk forwards through slots until we
// find a new one.
- SlotIndex End = Slots->getMBBEndIdx(P.MBB);
+ SlotIndex End = Slots->getMBBEndIdx(MBB);
for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) {
Pos = Slots->getInstructionFromIndex(Idx);
if (Pos) {
- P.MBB->insert(Pos->getIterator(), P.MI);
+ EmitInstsHere(Pos->getIterator());
break;
}
}
@@ -1911,8 +1934,8 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
// insert! It's not safe to discard any debug instructions; place them
// in front of the first terminator, or in front of end().
if (Idx >= End) {
- auto TermIt = P.MBB->getFirstTerminator();
- P.MBB->insert(TermIt, P.MI);
+ auto TermIt = MBB->getFirstTerminator();
+ EmitInstsHere(TermIt);
}
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
index d91ff734ad8f..6380c4bfd6e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -108,8 +108,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
- for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OrigMI->getOperand(i);
+ for (const MachineOperand &MO : OrigMI->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
@@ -425,15 +424,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
// The new intervals would have to be spilled anyway so its not worth it.
// Also they currently aren't spilled so creating them and not spilling
// them results in incorrect code.
- bool BeingSpilled = false;
- for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
- if (VReg == RegsBeingSpilled[i]) {
- BeingSpilled = true;
- break;
- }
- }
-
- if (BeingSpilled) continue;
+ if (llvm::is_contained(RegsBeingSpilled, VReg))
+ continue;
// LI may have been separated, create new intervals.
LI->RenumberValues();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
index dace05f1ad95..ada5c5be484a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -18,7 +18,7 @@
namespace llvm {
/// Helper function that distributes live range value numbers and the
-/// corresponding segments of a master live range \p LR to a list of newly
+/// corresponding segments of a primary live range \p LR to a list of newly
/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p
/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific
/// live range in the \p SplitLRs array.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
index 51ba4b7e53eb..e8744797707b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp
@@ -58,9 +58,9 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineInstr *
LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
- for (unsigned i = 0, e = Kills.size(); i != e; ++i)
- if (Kills[i]->getParent() == MBB)
- return Kills[i];
+ for (MachineInstr *MI : Kills)
+ if (MI->getParent() == MBB)
+ return MI;
return nullptr;
}
@@ -811,8 +811,8 @@ bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) {
LiveVariables::VarInfo &VI = getVarInfo(Reg);
SmallPtrSet<const MachineBasicBlock *, 8> Kills;
- for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
- Kills.insert(VI.Kills[i]->getParent());
+ for (MachineInstr *MI : VI.Kills)
+ Kills.insert(MI->getParent());
// Loop over all of the successors of the basic block, checking to see if
// the value is either live in the block, or if it is killed in the block.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 2e99c8595cbd..ee2387d1e8e6 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -316,14 +316,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// than that, but the increased register pressure makes that a
// tricky thing to balance. Investigate if re-materializing these
// becomes an issue.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI.operands()) {
// Consider replacing all frame index operands that reference
// an object allocated in the local block.
- if (MI.getOperand(i).isFI()) {
+ if (MO.isFI()) {
// Don't try this with values not in the local block.
- if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex()))
+ if (!MFI.isObjectPreAllocated(MO.getIndex()))
break;
- int Idx = MI.getOperand(i).getIndex();
+ int Idx = MO.getIndex();
int64_t LocalOffset = LocalOffsets[Idx];
if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
break;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 90ecc6fc68fc..b742ad9823c9 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -314,6 +314,8 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
}
bool Changed = MIRSampleLoader->runOnFunction(MF);
+ if (Changed)
+ MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>());
if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
index 366d06871245..310c2721c3bd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1170,9 +1170,10 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
void MachineFunction::finalizeDebugInstrRefs() {
auto *TII = getSubtarget().getInstrInfo();
- auto MakeDbgValue = [&](MachineInstr &MI) {
+ auto MakeUndefDbgValue = [&](MachineInstr &MI) {
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
MI.setDesc(RefII);
+ MI.getOperand(0).setReg(0);
MI.getOperand(1).ChangeToRegister(0, false);
};
@@ -1187,15 +1188,15 @@ void MachineFunction::finalizeDebugInstrRefs() {
Register Reg = MI.getOperand(0).getReg();
// Some vregs can be deleted as redundant in the meantime. Mark those
- // as DBG_VALUE $noreg.
- if (Reg == 0) {
- MakeDbgValue(MI);
+ // as DBG_VALUE $noreg. Additionally, some normal instructions are
+ // quickly deleted, leaving dangling references to vregs with no def.
+ if (Reg == 0 || !RegInfo->hasOneDef(Reg)) {
+ MakeUndefDbgValue(MI);
continue;
}
assert(Reg.isVirtual());
MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
- assert(RegInfo->hasOneDef(Reg));
// If we've found a copy-like instruction, follow it back to the
// instruction that defines the source value, see salvageCopySSA docs
@@ -1327,9 +1328,9 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
assert(Old != New && "Not making a change?");
bool MadeChange = false;
MachineJumpTableEntry &JTE = JumpTables[Idx];
- for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
- if (JTE.MBBs[j] == Old) {
- JTE.MBBs[j] = New;
+ for (MachineBasicBlock *&MBB : JTE.MBBs)
+ if (MBB == Old) {
+ MBB = New;
MadeChange = true;
}
return MadeChange;
@@ -1342,8 +1343,8 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
OS << printJumpTableEntryReference(i) << ':';
- for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
- OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
+ for (const MachineBasicBlock *MBB : JumpTables[i].MBBs)
+ OS << ' ' << printMBBReference(*MBB);
if (i != e)
OS << '\n';
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
index 5c4f75e9ceb9..aaa80432d2f2 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1490,12 +1490,10 @@ bool MachineInstr::allDefsAreDead() const {
/// instruction to this instruction.
void MachineInstr::copyImplicitOps(MachineFunction &MF,
const MachineInstr &MI) {
- for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands()))
if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
addOperand(MF, MO);
- }
}
bool MachineInstr::hasComplexRegisterTies() const {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
index 4d080e1a4f82..680dbe54ffaf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1071,7 +1071,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
// should be the same.
assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
- assert(MMO->getSize() == getSize() && "Size mismatch!");
+ assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) ||
+ MMO->getSize() == getSize()) &&
+ "Size mismatch!");
if (MMO->getBaseAlign() >= getBaseAlign()) {
// Update the alignment value.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
index cfbccebaff3e..7783b5e0d3cc 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -617,20 +617,11 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
F->addFnAttr(Attribute::OptimizeForSize);
F->addFnAttr(Attribute::MinSize);
- // Include target features from an arbitrary candidate for the outlined
- // function. This makes sure the outlined function knows what kinds of
- // instructions are going into it. This is fine, since all parent functions
- // must necessarily support the instructions that are in the outlined region.
Candidate &FirstCand = OF.Candidates.front();
- const Function &ParentFn = FirstCand.getMF()->getFunction();
- if (ParentFn.hasFnAttribute("target-features"))
- F->addFnAttr(ParentFn.getFnAttribute("target-features"));
+ const TargetInstrInfo &TII =
+ *FirstCand.getMF()->getSubtarget().getInstrInfo();
- // Set nounwind, so we don't generate eh_frame.
- if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) {
- return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
- }))
- F->addFnAttr(Attribute::NoUnwind);
+ TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -639,8 +630,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
index e18318386def..8d6459a627fa 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1455,17 +1455,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
int asap = 0;
int zeroLatencyDepth = 0;
SUnit *SU = &SUnits[I];
- for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
- EP = SU->Preds.end();
- IP != EP; ++IP) {
- SUnit *pred = IP->getSUnit();
- if (IP->getLatency() == 0)
+ for (const SDep &P : SU->Preds) {
+ SUnit *pred = P.getSUnit();
+ if (P.getLatency() == 0)
zeroLatencyDepth =
std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
- if (ignoreDependence(*IP, true))
+ if (ignoreDependence(P, true))
continue;
- asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
- getDistance(pred, SU, *IP) * MII));
+ asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
+ getDistance(pred, SU, P) * MII));
}
maxASAP = std::max(maxASAP, asap);
ScheduleInfo[I].ASAP = asap;
@@ -1521,9 +1519,8 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Preds,
const NodeSet *S = nullptr) {
Preds.clear();
- for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
- I != E; ++I) {
- for (const SDep &Pred : (*I)->Preds) {
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Pred : SU->Preds) {
if (S && S->count(Pred.getSUnit()) == 0)
continue;
if (ignoreDependence(Pred, true))
@@ -1532,7 +1529,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
Preds.insert(Pred.getSUnit());
}
// Back-edges are predecessors with an anti-dependence.
- for (const SDep &Succ : (*I)->Succs) {
+ for (const SDep &Succ : SU->Succs) {
if (Succ.getKind() != SDep::Anti)
continue;
if (S && S->count(Succ.getSUnit()) == 0)
@@ -2546,8 +2543,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
unsigned Pos = 0;
for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
++I, ++Pos) {
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
index 30745c7a5583..54c478645dcf 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@@ -596,8 +596,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
@@ -789,8 +788,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
// If this instruction is inside a loop and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
// Ignore non-register operands.
if (!MO.isReg())
continue;
@@ -889,8 +887,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = nullptr;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue; // Ignore non-register operands.
Register Reg = MO.getReg();
@@ -1322,8 +1319,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isUse())
continue;
Register Reg = MO.getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
index d6bb3e7c9e58..32078db76cf3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1276,11 +1276,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (DstTy.getNumElements() != MI->getNumOperands() - 1)
report("G_BUILD_VECTOR must have an operand for each elemement", MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
- }
break;
}
@@ -1292,12 +1290,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isVector() || SrcEltTy.isVector())
report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
MI);
- }
if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
"dest elt type",
@@ -1316,11 +1312,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (MI->getNumOperands() < 3)
report("G_CONCAT_VECTOR requires at least 2 source operands", MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
- }
if (DstTy.getNumElements() !=
SrcTy.getNumElements() * (MI->getNumOperands() - 1))
report("G_CONCAT_VECTOR num dest and source elements should match", MI);
@@ -3063,9 +3057,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred);
// Predecessor of landing pad live-out on last call.
if (MFI->isEHPad()) {
- for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) {
- if (I->isCall()) {
- PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex();
+ for (const MachineInstr &MI : llvm::reverse(*Pred)) {
+ if (MI.isCall()) {
+ PEnd = Indexes->getInstructionIndex(MI).getBoundaryIndex();
break;
}
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
index 8b3cdfab4d42..aaa6403cc978 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -73,8 +73,7 @@ void ModuloScheduleExpander::expand() {
// stage difference for each use. Keep the maximum value.
for (MachineInstr *MI : Schedule.getInstructions()) {
int DefStage = Schedule.getStage(MI);
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &Op = MI->getOperand(i);
+ for (const MachineOperand &Op : MI->operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
@@ -1006,8 +1005,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
unsigned CurStageNum,
unsigned InstrStageNum,
ValueMapTy *VRMap) {
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
+ for (MachineOperand &MO : NewMI->operands()) {
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
Register reg = MO.getReg();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 9a4f70a6070f..29a88480fd9f 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -527,9 +527,9 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
for (MachineBasicBlock *MBB : Visited) {
- MCPhysReg Reg = CSI[i].getReg();
+ MCPhysReg Reg = I.getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
@@ -540,17 +540,16 @@ static void updateLiveness(MachineFunction &MF) {
// each MBB between the prologue and epilogue so that it is not clobbered
// before it is reloaded in the epilogue. The Visited set contains all
// blocks outside of the region delimited by prologue/epilogue.
- if (CSI[i].isSpilledToReg()) {
+ if (I.isSpilledToReg()) {
for (MachineBasicBlock &MBB : MF) {
if (Visited.count(&MBB))
continue;
- MCPhysReg DstReg = CSI[i].getDstReg();
+ MCPhysReg DstReg = I.getDstReg();
if (!MBB.isLiveIn(DstReg))
MBB.addLiveIn(DstReg);
}
}
}
-
}
/// Insert restore code for the callee-saved registers used in the function.
@@ -902,9 +901,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() &&
- RegInfo->useFPForScavengingIndex(MF) &&
- !RegInfo->hasStackRealignment(MF));
+ bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF);
if (RS && EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
index 68920e2e50df..6653145d3d2a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1258,8 +1258,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free registers occupied by defs.
// Iterate operands in reverse order, so we see the implicit super register
// defs first (we added them earlier in case of <def,read-undef>).
- for (unsigned I = MI.getNumOperands(); I-- > 0;) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1362,8 +1361,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free early clobbers.
if (HasEarlyClobber) {
- for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
continue;
// subreg defs don't free the full register. We left the subreg number
@@ -1440,8 +1438,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) {
MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
++BundledMI;
while (BundledMI->isBundledWithPred()) {
- for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
- MachineOperand &MO = BundledMI->getOperand(I);
+ for (MachineOperand &MO : BundledMI->operands()) {
if (!MO.isReg())
continue;
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 5a93b58e0baf..50411c177007 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -199,7 +199,8 @@ class RAGreedy : public MachineFunctionPass,
struct RegInfo {
LiveRangeStage Stage = RS_New;
- // Cascade - Eviction loop prevention. See canEvictInterference().
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
unsigned Cascade = 0;
RegInfo() = default;
@@ -207,13 +208,51 @@ class RAGreedy : public MachineFunctionPass,
IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+ LiveRangeStage getStage(Register Reg) const {
+ return ExtraRegInfo[Reg].Stage;
+ }
+
LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return ExtraRegInfo[VirtReg.reg()].Stage;
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[Reg].Stage = Stage;
}
void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise initialize
+ /// it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ ExtraRegInfo.grow(Reg);
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[VirtReg.reg()].Stage = Stage;
+ ExtraRegInfo[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
}
template<typename Iterator>
@@ -410,8 +449,11 @@ private:
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
- bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
- const SmallVirtRegSet &) const;
+ bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ EvictionCost &,
+ const SmallVirtRegSet &) const;
+ bool canEvictHintInterference(LiveInterval &, MCRegister,
+ const SmallVirtRegSet &) const;
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End, EvictionCost &MaxCost) const;
@@ -683,15 +725,16 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
- ExtraRegInfo.grow(Reg);
- if (ExtraRegInfo[Reg].Stage == RS_New)
- ExtraRegInfo[Reg].Stage = RS_Assign;
-
- if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ auto Stage = getOrInitStage(Reg);
+ if (Stage == RS_New) {
+ Stage = RS_Assign;
+ setStage(Reg, Stage);
+ }
+ if (Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
// everything else has been allocated.
Prio = Size;
- } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+ } else if (Stage == RS_Memory) {
// Memory operand should be considered last.
// Change the priority such that Memory operand are assigned in
// the reverse order that they came in.
@@ -706,7 +749,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
- if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
+ if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
// Allocate original local ranges in linear instruction order. Since they
// are singly defined, this produces optimal coloring in the absence of
@@ -780,10 +823,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
if (Order.isHint(Hint)) {
MCRegister PhysHint = Hint.asMCReg();
LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
- EvictionCost MaxCost;
- MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, PhysHint, true, MaxCost,
- FixedRegisters)) {
+
+ if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) {
evictInterference(VirtReg, PhysHint, NewVRegs);
return PhysHint;
}
@@ -864,8 +905,19 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
return false;
}
-/// canEvictInterference - Return true if all interferences between VirtReg and
-/// PhysReg can be evicted.
+/// canEvictHintInterference - return true if the interference for VirtReg
+/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
+bool RAGreedy::canEvictHintInterference(
+ LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const {
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
+ return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
+ FixedRegisters);
+}
+
+/// canEvictInterferenceBasedOnCost - Return true if all interferences between
+/// VirtReg and PhysReg can be evicted.
///
/// @param VirtReg Live range that is about to be assigned.
/// @param PhysReg Desired register for assignment.
@@ -873,7 +925,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(
+bool RAGreedy::canEvictInterferenceBasedOnCost(
LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
@@ -1054,9 +1106,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
- if (!Cascade)
- Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++;
+ unsigned Cascade = getOrAssignNewCascade(VirtReg.reg());
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1082,10 +1132,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade ||
+ assert((getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg()].Cascade = Cascade;
+ setCascade(Intf->reg(), Cascade);
++NumEvicted;
NewVRegs.push_back(Intf->reg());
}
@@ -1150,8 +1200,8 @@ MCRegister RAGreedy::tryFindEvictionCandidate(
continue;
}
- if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
- FixedRegisters))
+ if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
continue;
// Best so far.
@@ -1756,7 +1806,6 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
SE->finish(&IntvMap);
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
unsigned OrigBlocks = SA->getNumLiveBlocks();
// Sort out the new intervals created by splitting. We get four kinds:
@@ -1765,10 +1814,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// - Block-local splits are candidates for local splitting.
// - DCE leftovers should go back on the queue.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
- LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
+ const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
- if (getStage(Reg) != RS_New)
+ if (getOrInitStage(Reg.reg()) != RS_New)
continue;
// Remainder interval. Don't try splitting again, spill if it doesn't
@@ -2012,13 +2061,11 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Tell LiveDebugVariables about the new ranges.
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
-
// Sort out the new intervals created by splitting. The remainder interval
// goes straight to spilling, the new local ranges get to stay RS_New.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
- LiveInterval &LI = LIS->getInterval(LREdit.get(I));
- if (getStage(LI) == RS_New && IntvMap[I] == 0)
+ const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
+ if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
setStage(LI, RS_Spill);
}
@@ -2104,8 +2151,6 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
-
// Assign all new registers to RS_Spill. This was the last chance.
setStage(LREdit.begin(), LREdit.end(), RS_Spill);
return 0;
@@ -2400,7 +2445,6 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
-
// If the new range has the same number of instructions as before, mark it as
// RS_Split2 so the next split will be forced to make progress. Otherwise,
// leave the new intervals as RS_New so they can compete.
@@ -3021,7 +3065,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
LiveRangeStage Stage = getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << ExtraRegInfo[VirtReg.reg()].Cascade << '\n');
+ << getCascade(VirtReg.reg()) << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3311,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
ExtraRegInfo.clear();
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c847068bca90..4c8534cf2d01 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3908,20 +3908,20 @@ void RegisterCoalescer::lateLiveIntervalUpdate() {
bool RegisterCoalescer::
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
- for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
- if (!CurrList[i])
+ for (MachineInstr *&MI : CurrList) {
+ if (!MI)
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.count(CurrList[i])) {
- CurrList[i] = nullptr;
+ if (ErasedInstrs.count(MI)) {
+ MI = nullptr;
continue;
}
bool Again = false;
- bool Success = joinCopy(CurrList[i], Again);
+ bool Success = joinCopy(MI, Again);
Progress |= Success;
if (Success || !Again)
- CurrList[i] = nullptr;
+ MI = nullptr;
}
return Progress;
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3f013eb6024e..0e8e8338b46d 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -406,11 +406,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// register in later operands. The lanes of other defs will now be live
// after this instruction, so these should not be treated as killed by the
// instruction even though they appear to be killed in this one operand.
- for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &OtherMO = MI->getOperand(I);
+ for (const MachineOperand &OtherMO :
+ llvm::drop_begin(MI->operands(), OperIdx + 1))
if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
KillLaneMask &= ~getLaneMaskForMO(OtherMO);
- }
}
// Clear undef flag, we'll re-add it later once we know which subregister
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ce400ea43f29..df5a041b87cd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4436,7 +4436,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
- if (OptimizedDiv.getNode()) {
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
// If the equivalent Div node also exists, update its users.
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
@@ -4464,6 +4464,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDLoc DL(N);
if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// fold (mulhs x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
@@ -4521,6 +4524,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDLoc DL(N);
if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// fold (mulhu x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
@@ -4779,6 +4785,106 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
return SDValue();
}
+// Function to calculate whether the Min/Max pair of SDNodes (potentially
+// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1)
+// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp
+// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The
+// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3
+static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC, unsigned &BW) {
+ auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // The compare and select operand should be the same or the select operands
+ // should be truncated versions of the comparison.
+ if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
+ return 0;
+ // The constants need to be the same or a truncated version of each other.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return 0;
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C2 = N3C->getAPIntValue();
+ if (C1.getBitWidth() < C2.getBitWidth() ||
+ C1 != C2.sextOrSelf(C1.getBitWidth()))
+ return 0;
+ return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
+ };
+
+ // Check the initial value is a SMIN/SMAX equivalent.
+ unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
+ if (!Opcode0)
+ return SDValue();
+
+ SDValue N00, N01, N02, N03;
+ ISD::CondCode N0CC;
+ switch (N0.getOpcode()) {
+ case ISD::SMIN:
+ case ISD::SMAX:
+ N00 = N02 = N0.getOperand(0);
+ N01 = N03 = N0.getOperand(1);
+ N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
+ break;
+ case ISD::SELECT_CC:
+ N00 = N0.getOperand(0);
+ N01 = N0.getOperand(1);
+ N02 = N0.getOperand(2);
+ N03 = N0.getOperand(3);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
+ break;
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ if (N0.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+ N00 = N0.getOperand(0).getOperand(0);
+ N01 = N0.getOperand(0).getOperand(1);
+ N02 = N0.getOperand(1);
+ N03 = N0.getOperand(2);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
+ break;
+ default:
+ return SDValue();
+ }
+
+ unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
+ if (!Opcode1 || Opcode0 == Opcode1)
+ return SDValue();
+
+ ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
+ ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
+ if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
+ return SDValue();
+
+ const APInt &MinC = MinCOp->getAPIntValue();
+ const APInt &MaxC = MaxCOp->getAPIntValue();
+ APInt MinCPlus1 = MinC + 1;
+ if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2())
+ return SDValue();
+ BW = MinCPlus1.exactLogBase2() + 1;
+ return N02;
+}
+
+static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ unsigned BW;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW);
+ if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
+ return SDValue();
+ EVT FPVT = Fp.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(
+ ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT))
+ return SDValue();
+ SDLoc DL(Fp);
+ SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+}
+
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4817,6 +4923,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
+ if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
+ if (SDValue S = PerformMinMaxFpToSatCombine(
+ N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
+ return S;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -9940,9 +10051,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
- if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
- MST->isUnindexed() && !MST->isCompressingStore() &&
- !MST->isTruncatingStore())
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
+ !MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
MST->getBasePtr(), MST->getMemOperand());
@@ -9997,9 +10107,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, expanding, or extending loads?
- if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
- MLD->isUnindexed() && !MLD->isExpandingLoad() &&
- MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
+ !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
MLD->getBasePtr(), MLD->getMemOperand());
return CombineTo(N, NewLd, NewLd.getValue(1));
@@ -10138,6 +10247,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return FMinMax;
}
+ if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
+
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
// TODO: This should be extended to handle any constant.
@@ -15007,7 +15119,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -23034,6 +23146,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
}
+ if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
+
return SDValue();
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c1bb65409282..331e0325aea3 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -765,7 +765,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
assert(!SD->isVariadic());
SDDbgOperand DbgOperand = SD->getLocationOps()[0];
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = (DIExpression*)SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
@@ -775,6 +775,13 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
DbgOperand.getKind() == SDDbgOperand::CONST)
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+ // Immediately fold any indirectness from the LLVM-IR intrinsic into the
+ // expression:
+ if (SD->isIndirect()) {
+ std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
+ Expr = DIExpression::append(Expr, Elts);
+ }
+
// It may not be immediately possible to identify the MachineInstr that
// defines a VReg, it can depend for example on the order blocks are
// emitted in. When this happens, or when further analysis is needed later,
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index eb9d2286aeb4..08598eeded7a 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3553,9 +3553,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Node.
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
- if (Tmp2.getOpcode() == ISD::SETCC) {
- Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
- Tmp1, Tmp2.getOperand(2),
+ if (Tmp2.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ Tmp2.getOperand(0).getValueType())) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2),
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 1f73c9eea104..98312f91d8c0 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
static cl::opt<bool>
EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
-/// Do extensive, expensive, sanity checking.
+/// Do extensive, expensive, basic correctness checking.
void DAGTypeLegalizer::PerformExpensiveChecks() {
// If a node is not processed, then none of its values should be mapped by any
// of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
@@ -534,7 +534,8 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// The node morphed into a different node. Normally for this to happen
// the original node would have to be marked NewNode. However this can
// in theory momentarily not be the case while ReplaceValueWith is doing
- // its stuff. Mark the original node NewNode to help sanity checking.
+ // its stuff. Mark the original node NewNode to help basic correctness
+ // checking.
N->setNodeId(NewNode);
if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
// It morphed into a previously analyzed node - nothing more to do.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 539c9cb9c256..7ec2638b1e71 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
- unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment,
- MLD->getAAInfo(), MLD->getRanges());
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Generate hi masked load.
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
- unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector())
@@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
LoMemVT.getStoreSize().getFixedSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(),
- MLD->getRanges());
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
SDValue Lo, Hi, Res;
- unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment,
- N->getAAInfo(), N->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MPI = N->getPointerInfo().getWithOffset(
LoMemVT.getStoreSize().getFixedSize());
- unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(),
- N->getRanges());
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 55fe26eb64cd..2695ed36991c 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -268,8 +268,8 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
// Now see if there are no other dependencies
// to instructions already in the packet.
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- for (const SDep &Succ : Packet[i]->Succs) {
+ for (const SUnit *S : Packet)
+ for (const SDep &Succ : S->Succs) {
// Since we do not add pseudos to packets, might as well
// ignore order deps.
if (Succ.isCtrl())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 95f7e43b151d..84e6d2a16422 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -706,8 +706,8 @@ void ScheduleDAGSDNodes::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
+ for (const SUnit *SU : Sequence) {
+ if (SU)
dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 008665d50233..c282e03387dd 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -406,8 +406,8 @@ bool ISD::isVPOpcode(unsigned Opcode) {
switch (Opcode) {
default:
return false;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \
+ case ISD::VPSD: \
return true;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -416,23 +416,25 @@ bool ISD::isVPOpcode(unsigned Opcode) {
bool ISD::isVPBinaryOp(unsigned Opcode) {
switch (Opcode) {
default:
- return false;
-#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
- case ISD::SDOPC: \
- return true;
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_BINARYOP return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return false;
}
bool ISD::isVPReduction(unsigned Opcode) {
switch (Opcode) {
default:
- return false;
-#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
- case ISD::SDOPC: \
- return true;
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return false;
}
/// The operand position of the vector mask.
@@ -440,8 +442,8 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
default:
return None;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \
+ case ISD::VPSD: \
return MASKPOS;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -452,8 +454,8 @@ Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
switch (Opcode) {
default:
return None;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
+ case ISD::VPSD: \
return EVLPOS;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -974,7 +976,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
}
#ifndef NDEBUG
-/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+/// VerifySDNode - Check the given SDNode. Aborts if it is invalid.
static void VerifySDNode(SDNode *N) {
switch (N->getOpcode()) {
default:
@@ -4540,10 +4542,25 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
}
// FIXME: unify with llvm::haveNoCommonBitsSet.
-// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
+ // Match masked merge pattern (X & ~M) op (Y & M)
+ if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) {
+ auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) {
+ if (isBitwiseNot(NotM, true)) {
+ SDValue NotOperand = NotM->getOperand(0);
+ return NotOperand == And->getOperand(0) ||
+ NotOperand == And->getOperand(1);
+ }
+ return false;
+ };
+ if (MatchNoCommonBitsPattern(A->getOperand(0), B) ||
+ MatchNoCommonBitsPattern(A->getOperand(1), B) ||
+ MatchNoCommonBitsPattern(B->getOperand(0), A) ||
+ MatchNoCommonBitsPattern(B->getOperand(1), A))
+ return true;
+ }
return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
computeKnownBits(B));
}
@@ -5070,7 +5087,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getUNDEF(VT);
break;
case ISD::BITCAST:
- // Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
"Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5d911c165293..7726a0007e44 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
+ MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- MemoryLocation ML;
- if (VT.isScalableVector())
- ML = MemoryLocation::getAfter(PtrOperand);
- else
- ML = MemoryLocation(PtrOperand, LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
@@ -5807,8 +5797,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vscale: {
match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- setValue(&I,
- DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
+ setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
return;
}
case Intrinsic::vastart: visitVAStart(I); return;
@@ -6942,10 +6931,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
- SDValue N =
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+ SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
if (Intrinsic == Intrinsic::eh_exceptioncode)
- N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
setValue(&I, N);
return;
}
@@ -6957,7 +6945,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Triple.getArch() != Triple::x86_64)
return;
- SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
@@ -6974,7 +6961,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
- DL, NodeTys, Ops);
+ sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
@@ -6988,7 +6975,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Triple.getArch() != Triple::x86_64)
return;
- SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
@@ -7009,7 +6995,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(
- TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
@@ -7047,7 +7033,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (!Base)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
- Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
struct BranchFunnelTarget {
int64_t Offset;
@@ -7068,8 +7054,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Targets.push_back({Offset, DAG.getTargetGlobalAddress(
- GA->getGlobal(), getCurSDLoc(),
- Val.getValueType(), GA->getOffset())});
+ GA->getGlobal(), sdl, Val.getValueType(),
+ GA->getOffset())});
}
llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
@@ -7077,13 +7063,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
});
for (auto &T : Targets) {
- Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
Ops.push_back(T.Target);
}
Ops.push_back(DAG.getRoot()); // Chain
- SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
- getCurSDLoc(), MVT::Other, Ops),
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
+ MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
@@ -7102,7 +7088,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
SDValue Val = TSI.EmitTargetCodeForSetTag(
- DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+ DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
ZeroMemory);
DAG.setRoot(Val);
@@ -7114,46 +7100,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Const = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
- setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
- DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
+ DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
return;
}
case Intrinsic::get_active_lane_mask: {
- auto DL = getCurSDLoc();
+ EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Index = getValue(I.getOperand(0));
- SDValue TripCount = getValue(I.getOperand(1));
- Type *ElementTy = I.getOperand(0)->getType();
- EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- unsigned VecWidth = VT.getVectorNumElements();
+ EVT ElementVT = Index.getValueType();
- SmallVector<SDValue, 16> OpsTripCount;
- SmallVector<SDValue, 16> OpsIndex;
- SmallVector<SDValue, 16> OpsStepConstants;
- for (unsigned i = 0; i < VecWidth; i++) {
- OpsTripCount.push_back(TripCount);
- OpsIndex.push_back(Index);
- OpsStepConstants.push_back(
- DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
+ if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
}
- EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);
+ SDValue TripCount = getValue(I.getOperand(1));
+ auto VecTy = CCVT.changeVectorElementType(ElementVT);
- auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
- SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
- SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
+ SDValue VectorIndex, VectorTripCount;
+ if (VecTy.isScalableVector()) {
+ VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
+ VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
+ } else {
+ VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
+ VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
+ }
+ SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
- ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
- SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
+ ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
+ SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0),
VectorTripCount, ISD::CondCode::SETULT);
- setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
- DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
+ setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT,
+ DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT),
SetCC));
return;
}
case Intrinsic::experimental_vector_insert: {
- auto DL = getCurSDLoc();
-
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
@@ -7163,16 +7145,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
- cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
+ setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
Index));
return;
}
case Intrinsic::experimental_vector_extract: {
- auto DL = getCurSDLoc();
-
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -7182,9 +7162,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
- cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
- setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
+ setValue(&I,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
return;
}
case Intrinsic::experimental_vector_reverse:
@@ -7314,9 +7295,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
-#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
-#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
-#define END_REGISTER_VP_INTRINSIC(...) break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e4a69adff05b..737695b5eabe 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -645,6 +645,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(Op.getValueType());
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
@@ -663,16 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
- // TODO - bigendian once we have test coverage.
- if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
- DAG.getDataLayout().isLittleEndian()) {
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -687,8 +687,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
// TODO - bigendian once we have test coverage.
- if ((NumSrcEltBits % NumDstEltBits) == 0 &&
- DAG.getDataLayout().isLittleEndian()) {
+ if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
@@ -802,8 +801,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
- if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
- DAG.getDataLayout().isLittleEndian() &&
+ if (IsLE && DemandedElts == 1 &&
+ DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
return DAG.getBitcast(DstVT, Src);
}
@@ -913,6 +912,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getValueType().isScalableVector())
return false;
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
@@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ bool IsROTL = (Op.getOpcode() == ISD::ROTL);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+ unsigned RevAmt = BitWidth - Amt;
+
+ // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
+ // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
+ APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ // rot*(x, 0) --> x
+ if (Amt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // See if we don't demand either half of the rotated bits.
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
+ DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
+ }
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
+ DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ }
+ }
+
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
@@ -1887,9 +1916,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
@@ -1925,9 +1953,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
@@ -1976,9 +2003,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
@@ -2140,16 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
- // TODO - bigendian once we have test coverage.
- if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -2167,8 +2192,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
- } else if ((NumSrcEltBits % BitWidth) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
+ } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
+ // TODO - bigendian once we have test coverage.
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
@@ -2409,6 +2434,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
// Helper for demanding the specified elements and all the bits of both binary
// operands.
@@ -2484,7 +2510,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Try calling SimplifyDemandedBits, converting demanded elts to the bits
// of the large element.
// TODO - bigendian once we have test coverage.
- if (TLO.DAG.getDataLayout().isLittleEndian()) {
+ if (IsLE) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
@@ -2797,9 +2823,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
- if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
- DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+ DemandedSrcElts == 1) {
// aext - if we just need the bottom element then we can bitcast.
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
}
@@ -2812,8 +2838,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// zext - if we just need the bottom element then we can mask:
// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
- if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
- Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
+ Op->isOnlyUserOf(Src.getNode()) &&
Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
SDLoc DL(Op);
EVT SrcVT = Src.getValueType();
@@ -2834,9 +2860,19 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
+ case ISD::ADD: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1, /*AssumeSingleUse*/ true))
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+ }
case ISD::OR:
case ISD::XOR:
- case ISD::ADD:
case ISD::SUB:
case ISD::FADD:
case ISD::FSUB:
@@ -5586,7 +5622,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
@@ -5832,7 +5868,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
index 9aea5a7a8853..f49ba5ccd447 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -159,8 +159,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
// FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
int FI = MO.getIndex();
@@ -394,8 +393,7 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
SmallVectorImpl<int> &SlotMapping,
MachineFunction &MF) {
// Update the operands.
- for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
int OldFI = MO.getIndex();
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
index 943bd18c6c8b..54fc6ee45d00 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
-static cl::opt<unsigned> TailDupJmpTableLoopSize(
- "tail-dup-jmptable-loop-size",
- cl::desc("Maximum loop latches to consider tail duplication that are "
- "successors of loop header."),
- cl::init(128), cl::Hidden);
-
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
- // When doing tail-duplication with jumptable loops like:
- // 1 -> 2 <-> 3 |
- // \ <-> 4 |
- // \ <-> 5 |
- // \ <-> ... |
- // \---> rest |
- // quadratic number of edges and much more loops are added to CFG. This
- // may cause compile time regression when jumptable is quiet large.
- // So set the limit on jumptable cases.
- auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
- const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
- TailBB.pred_end());
- // Check the basic block has large number of successors, all of them only
- // have one successor which is the basic block itself.
- return llvm::count_if(
- TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
- return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
- }) > TailDupJmpTableLoopSize;
- };
-
- if (isLargeJumpTableLoop(TailBB))
- return false;
-
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index b0594ec086b2..fbf190a52585 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -136,6 +136,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
return 0;
}
+bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+ if (!hasFP(MF))
+ return false;
+
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ return RegInfo->useFPForScavengingIndex(MF) &&
+ !RegInfo->hasStackRealignment(MF);
+}
+
bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
!F.hasFnAttribute(Attribute::NoRecurse))
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e74b3195a130..5119dac36713 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -957,8 +957,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// If any of the registers accessed are non-constant, conservatively assume
// the instruction is not rematerializable.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0)
@@ -1401,3 +1400,21 @@ std::string TargetInstrInfo::createMIROperandComment(
}
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
+
+void TargetInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ outliner::Candidate &FirstCand = Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-features"));
+
+ // Set nounwind, so we don't generate eh_frame.
+ if (llvm::all_of(Candidates, [](const outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
+ }))
+ F.addFnAttr(Attribute::NoUnwind);
+}
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 1d3bb286c882..d1c2cdeb133b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1082,7 +1082,7 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
return nullptr;
- // Basic sanity checks.
+ // Basic correctness checks.
if (LHS->getType()->getPointerAddressSpace() != 0 ||
RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
RHS->isThreadLocal())
@@ -2135,7 +2135,7 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
return nullptr;
- // Basic sanity checks.
+ // Basic correctness checks.
if (LHS->getType()->getPointerAddressSpace() != 0 ||
RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
RHS->isThreadLocal())
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 46cec5407565..dfd962be2882 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -373,19 +373,25 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
return false;
}
-/// Given a register, if has a single in-basic block use, return the use
-/// instruction if it's a copy or a two-address use.
+/// Given a register, if all its uses are in the same basic block, return the
+/// last use instruction if it's a copy or a two-address use.
static MachineInstr *
findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
- bool &IsCopy, Register &DstReg, bool &IsDstPhys) {
- if (!MRI->hasOneNonDBGUse(Reg))
- // None or more than one use.
- return nullptr;
- MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
- MachineInstr &UseMI = *UseOp.getParent();
- if (UseMI.getParent() != MBB)
+ bool &IsCopy, Register &DstReg, bool &IsDstPhys,
+ LiveIntervals *LIS) {
+ MachineOperand *UseOp = nullptr;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ return nullptr;
+ if (isPlainlyKilled(MI, Reg, LIS))
+ UseOp = &MO;
+ }
+ if (!UseOp)
return nullptr;
+ MachineInstr &UseMI = *UseOp->getParent();
+
Register SrcReg;
bool IsSrcPhys;
if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
@@ -399,7 +405,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
}
if (UseMI.isCommutable()) {
unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned Src2 = UseMI.getOperandNo(&UseOp);
+ unsigned Src2 = UseMI.getOperandNo(UseOp);
if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
MachineOperand &MO = UseMI.getOperand(Src1);
if (MO.isReg() && MO.isUse() &&
@@ -492,8 +498,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
return;
}
- for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask()) {
removeMapRegEntry(MO, SrcRegMap, TRI);
continue;
@@ -685,7 +690,6 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
- // Sanity check.
assert(mi->getNumExplicitDefs() == 1);
assert(NewMI->getNumExplicitDefs() == 1);
@@ -724,7 +728,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
Register NewReg;
Register Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
- NewReg, IsDstPhys)) {
+ NewReg, IsDstPhys, LIS)) {
if (IsCopy && !Processed.insert(UseMI).second)
break;
@@ -1336,8 +1340,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Success, or at least we made an improvement. Keep the unfolded
// instructions and discard the original.
if (LV) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg().isVirtual()) {
if (MO.isUse()) {
if (MO.isKill()) {