diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
57 files changed, 1426 insertions, 782 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 87a3cede601b..5984063627b0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -354,8 +354,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; @@ -407,8 +406,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction( // Scan the register defs for this instruction and update // live-ranges. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; @@ -495,8 +493,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, LLVM_DEBUG(dbgs() << "\tKill Group:"); unsigned FirstReg = 0; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; @@ -762,11 +759,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // ...need a map from MI to SUnit. std::map<MachineInstr *, const SUnit *> MISUnitMap; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - const SUnit *SU = &SUnits[i]; - MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), - SU)); - } + for (const SUnit &SU : SUnits) + MISUnitMap.insert(std::make_pair(SU.getInstr(), &SU)); // Track progress along the critical path through the SUnit graph as // we walk the instructions. This is needed for regclasses that only @@ -774,12 +768,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( const SUnit *CriticalPathSU = nullptr; MachineInstr *CriticalPathMI = nullptr; if (CriticalPathSet.any()) { - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - const SUnit *SU = &SUnits[i]; + for (const SUnit &SU : SUnits) { if (!CriticalPathSU || - ((SU->getDepth() + SU->Latency) > + ((SU.getDepth() + SU.Latency) > (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { - CriticalPathSU = SU; + CriticalPathSU = &SU; } } assert(CriticalPathSU && "Failed to find SUnit critical path"); @@ -839,8 +832,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // but don't cause any anti-dependence breaking themselves) if (!MI.isKill()) { // Attempt to break each anti-dependency... - for (unsigned i = 0, e = Edges.size(); i != e; ++i) { - const SDep *Edge = Edges[i]; + for (const SDep *Edge : Edges) { SUnit *NextSU = Edge->getSUnit(); if ((Edge->getKind() != SDep::Anti) && diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index cc848d28a9a7..828cb760b82e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -809,9 +809,9 @@ void AsmPrinter::emitFunctionHeader() { // so that we don't get references to undefined symbols. std::vector<MCSymbol*> DeadBlockSyms; MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); - for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + for (MCSymbol *DeadBlockSym : DeadBlockSyms) { OutStreamer->AddComment("Address taken block that was later removed"); - OutStreamer->emitLabel(DeadBlockSyms[i]); + OutStreamer->emitLabel(DeadBlockSym); } if (CurrentFnBegin) { @@ -910,8 +910,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { std::string Str; raw_string_ostream OS(Str); OS << "kill:"; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &Op = MI->getOperand(i); + for (const MachineOperand &Op : MI->operands()) { assert(Op.isReg() && "KILL instruction must have only register operands"); OS << ' ' << (Op.isDef() ? "def " : "killed ") << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); @@ -2150,8 +2149,7 @@ void AsmPrinter::emitJumpTableInfo() { SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); - for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { - const MachineBasicBlock *MBB = JTBBs[ii]; + for (const MachineBasicBlock *MBB : JTBBs) { if (!EmittedSets.insert(MBB).second) continue; @@ -2177,8 +2175,8 @@ void AsmPrinter::emitJumpTableInfo() { MCSymbol* JTISymbol = GetJTISymbol(JTI); OutStreamer->emitLabel(JTISymbol); - for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) - emitJumpTableEntry(MJTI, JTBBs[ii], JTI); + for (const MachineBasicBlock *MBB : JTBBs) + emitJumpTableEntry(MJTI, MBB, JTI); } if (!JTInDiffSection) OutStreamer->emitDataRegion(MCDR_DataRegionEnd); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index ef1abc47701a..5d0cadefdbf7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -128,191 +128,29 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, emitInlineAsmEnd(STI, &TAP->getSTI()); } -static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, const MCAsmInfo *MAI, - AsmPrinter *AP, uint64_t LocCookie, - raw_ostream &OS) { - // Switch to the inline assembly variant. - OS << "\t.intel_syntax\n\t"; - - int CurVariant = -1; // The number of the {.|.|.} region we are in. - const char *LastEmitted = AsmStr; // One past the last character emitted. - unsigned NumOperands = MI->getNumOperands(); - int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel. - - while (*LastEmitted) { - switch (*LastEmitted) { - default: { - // Not a special case, emit the string section literally. - const char *LiteralEnd = LastEmitted+1; - while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && - *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') - ++LiteralEnd; - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - OS.write(LastEmitted, LiteralEnd - LastEmitted); - LastEmitted = LiteralEnd; - break; - } - case '\n': - ++LastEmitted; // Consume newline character. - OS << '\n'; // Indent code with newline. - break; - case '$': { - ++LastEmitted; // Consume '$' character. - bool Done = true; - - // Handle escapes. - switch (*LastEmitted) { - default: Done = false; break; - case '$': - ++LastEmitted; // Consume second '$' character. - break; - case '(': // $( -> same as GCC's { character. - ++LastEmitted; // Consume '(' character. - if (CurVariant != -1) - report_fatal_error("Nested variants found in inline asm string: '" + - Twine(AsmStr) + "'"); - CurVariant = 0; // We're in the first variant now. - break; - case '|': - ++LastEmitted; // Consume '|' character. - if (CurVariant == -1) - OS << '|'; // This is gcc's behavior for | outside a variant. - else - ++CurVariant; // We're in the next variant. - break; - case ')': // $) -> same as GCC's } char. - ++LastEmitted; // Consume ')' character. - if (CurVariant == -1) - OS << '}'; // This is gcc's behavior for } outside a variant. - else - CurVariant = -1; - break; - } - if (Done) break; - - bool HasCurlyBraces = false; - if (*LastEmitted == '{') { // ${variable} - ++LastEmitted; // Consume '{' character. - HasCurlyBraces = true; - } - - // If we have ${:foo}, then this is not a real operand reference, it is a - // "magic" string reference, just like in .td files. Arrange to call - // PrintSpecial. - if (HasCurlyBraces && *LastEmitted == ':') { - ++LastEmitted; - const char *StrStart = LastEmitted; - const char *StrEnd = strchr(StrStart, '}'); - if (!StrEnd) - report_fatal_error("Unterminated ${:foo} operand in inline asm" - " string: '" + Twine(AsmStr) + "'"); - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart)); - LastEmitted = StrEnd+1; - break; - } - - const char *IDStart = LastEmitted; - const char *IDEnd = IDStart; - while (isDigit(*IDEnd)) - ++IDEnd; - - unsigned Val; - if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) - report_fatal_error("Bad $ operand number in inline asm string: '" + - Twine(AsmStr) + "'"); - LastEmitted = IDEnd; - - if (Val >= NumOperands - 1) - report_fatal_error("Invalid $ operand number in inline asm string: '" + - Twine(AsmStr) + "'"); - - char Modifier[2] = { 0, 0 }; - - if (HasCurlyBraces) { - // If we have curly braces, check for a modifier character. This - // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. - if (*LastEmitted == ':') { - ++LastEmitted; // Consume ':' character. - if (*LastEmitted == 0) - report_fatal_error("Bad ${:} expression in inline asm string: '" + - Twine(AsmStr) + "'"); - - Modifier[0] = *LastEmitted; - ++LastEmitted; // Consume modifier character. - } - - if (*LastEmitted != '}') - report_fatal_error("Bad ${} expression in inline asm string: '" + - Twine(AsmStr) + "'"); - ++LastEmitted; // Consume '}' character. - } - - // Okay, we finally have a value number. Ask the target to print this - // operand! - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = InlineAsm::MIOp_FirstOperand; - - bool Error = false; - - // Scan to find the machine operand number for the operand. - for (; Val; --Val) { - if (OpNo >= MI->getNumOperands()) - break; - unsigned OpFlags = MI->getOperand(OpNo).getImm(); - OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; - } - - // We may have a location metadata attached to the end of the - // instruction, and at no point should see metadata at any - // other point while processing. It's an error if so. - if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) { - Error = true; - } else { - unsigned OpFlags = MI->getOperand(OpNo).getImm(); - ++OpNo; // Skip over the ID number. - - // FIXME: Shouldn't arch-independent output template handling go into - // PrintAsmOperand? - // Labels are target independent. - if (MI->getOperand(OpNo).isBlockAddress()) { - const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); - MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); - Sym->print(OS, AP->MAI); - MMI->getContext().registerInlineAsmLabel(Sym); - } else if (InlineAsm::isMemKind(OpFlags)) { - Error = AP->PrintAsmMemoryOperand( - MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); - } else { - Error = AP->PrintAsmOperand(MI, OpNo, - Modifier[0] ? Modifier : nullptr, OS); - } - } - if (Error) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "invalid operand in inline asm: '" << AsmStr << "'"; - MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); - } - } - break; - } - } +static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, const MCAsmInfo *MAI, + AsmPrinter *AP, uint64_t LocCookie, + raw_ostream &OS) { + bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel; + + if (InputIsIntelDialect) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; } - OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. -} -static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, - MachineModuleInfo *MMI, const MCAsmInfo *MAI, - AsmPrinter *AP, uint64_t LocCookie, - raw_ostream &OS) { int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. unsigned NumOperands = MI->getNumOperands(); - int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); - if (MAI->getEmitGNUAsmStartIndentationMarker()) + int AsmPrinterVariant; + if (InputIsIntelDialect) + AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel. + else + AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant(); + + // FIXME: Should this happen for `asm inteldialect` as well? + if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker()) OS << '\t'; while (*LastEmitted) { @@ -340,8 +178,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, switch (*LastEmitted) { default: Done = false; break; case '$': // $$ -> $ - if (CurVariant == -1 || CurVariant == AsmPrinterVariant) - OS << '$'; + if (!InputIsIntelDialect) + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS << '$'; ++LastEmitted; // Consume second '$' character. break; case '(': // $( -> same as GCC's { character. @@ -480,6 +319,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } } } + if (InputIsIntelDialect) + OS << "\n\t.att_syntax"; OS << '\n' << (char)0; // null terminate string. } @@ -515,9 +356,8 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { // it. uint64_t LocCookie = 0; const MDNode *LocMD = nullptr; - for (unsigned i = MI->getNumOperands(); i != 0; --i) { - if (MI->getOperand(i-1).isMetadata() && - (LocMD = MI->getOperand(i-1).getMetadata()) && + for (const MachineOperand &MO : llvm::reverse(MI->operands())) { + if (MO.isMetadata() && (LocMD = MO.getMetadata()) && LocMD->getNumOperands() != 0) { if (const ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { @@ -533,10 +373,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { raw_svector_ostream OS(StringData); AsmPrinter *AP = const_cast<AsmPrinter*>(this); - if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) - EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); - else - EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); + EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as // that might lead to undefined behaviour. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a36d2966d44a..9b73f0ab2f05 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -521,8 +521,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { } // Construct a DIE for this scope. -void DwarfCompileUnit::constructScopeDIE( - LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) { +void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, + DIE &ParentScopeDIE) { if (!Scope || !Scope->getScopeNode()) return; @@ -533,46 +533,27 @@ void DwarfCompileUnit::constructScopeDIE( "constructSubprogramScopeDIE for non-inlined " "subprograms"); - SmallVector<DIE *, 8> Children; - - // We try to create the scope DIE first, then the children DIEs. This will - // avoid creating un-used children then removing them later when we find out - // the scope DIE is null. - DIE *ScopeDIE; + // Emit inlined subprograms. if (Scope->getParent() && isa<DISubprogram>(DS)) { - ScopeDIE = constructInlinedScopeDIE(Scope); + DIE *ScopeDIE = constructInlinedScopeDIE(Scope); if (!ScopeDIE) return; - // We create children when the scope DIE is not null. - createScopeChildrenDIE(Scope, Children); - } else { - // Early exit when we know the scope DIE is going to be null. - if (DD->isLexicalScopeDIENull(Scope)) - return; - - bool HasNonScopeChildren = false; - // We create children here when we know the scope DIE is not going to be - // null and the children will be added to the scope DIE. - createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren); - - // If there are only other scopes as children, put them directly in the - // parent instead, as this scope would serve no purpose. - if (!HasNonScopeChildren) { - FinalChildren.insert(FinalChildren.end(), - std::make_move_iterator(Children.begin()), - std::make_move_iterator(Children.end())); - return; - } - ScopeDIE = constructLexicalScopeDIE(Scope); - assert(ScopeDIE && "Scope DIE should not be null."); + ParentScopeDIE.addChild(ScopeDIE); + createAndAddScopeChildren(Scope, *ScopeDIE); + return; } - // Add children - for (auto &I : Children) - ScopeDIE->addChild(std::move(I)); + // Early exit when we know the scope DIE is going to be null. + if (DD->isLexicalScopeDIENull(Scope)) + return; + + // Emit lexical blocks. + DIE *ScopeDIE = constructLexicalScopeDIE(Scope); + assert(ScopeDIE && "Scope DIE should not be null."); - FinalChildren.push_back(std::move(ScopeDIE)); + ParentScopeDIE.addChild(ScopeDIE); + createAndAddScopeChildren(Scope, *ScopeDIE); } void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, @@ -1022,42 +1003,6 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { return Result; } -DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, - SmallVectorImpl<DIE *> &Children, - bool *HasNonScopeChildren) { - assert(Children.empty()); - DIE *ObjectPointer = nullptr; - - // Emit function arguments (order is significant). - auto Vars = DU->getScopeVariables().lookup(Scope); - for (auto &DV : Vars.Args) - Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); - - // Emit local variables. - auto Locals = sortLocalVars(Vars.Locals); - for (DbgVariable *DV : Locals) - Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); - - // Skip imported directives in gmlt-like data. - if (!includeMinimalInlineScopes()) { - // There is no need to emit empty lexical block DIE. - for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) - Children.push_back( - constructImportedEntityDIE(cast<DIImportedEntity>(IE))); - } - - if (HasNonScopeChildren) - *HasNonScopeChildren = !Children.empty(); - - for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) - Children.push_back(constructLabelDIE(*DL, *Scope)); - - for (LexicalScope *LS : Scope->getChildren()) - constructScopeDIE(LS, Children); - - return ObjectPointer; -} - DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) { DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); @@ -1088,13 +1033,48 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE) { - // We create children when the scope DIE is not null. - SmallVector<DIE *, 8> Children; - DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); + DIE *ObjectPointer = nullptr; + + // Emit function arguments (order is significant). + auto Vars = DU->getScopeVariables().lookup(Scope); + for (auto &DV : Vars.Args) + ScopeDIE.addChild(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); + + // Emit local variables. + auto Locals = sortLocalVars(Vars.Locals); + for (DbgVariable *DV : Locals) + ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer)); + + // Emit imported entities (skipped in gmlt-like data). + if (!includeMinimalInlineScopes()) { + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE))); + } + + // Emit labels. + for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) + ScopeDIE.addChild(constructLabelDIE(*DL, *Scope)); - // Add children - for (auto &I : Children) - ScopeDIE.addChild(std::move(I)); + // Emit inner lexical scopes. + auto needToEmitLexicalScope = [this](LexicalScope *LS) { + if (isa<DISubprogram>(LS->getScopeNode())) + return true; + auto Vars = DU->getScopeVariables().lookup(LS); + if (!Vars.Args.empty() || !Vars.Locals.empty()) + return true; + if (!includeMinimalInlineScopes() && + !ImportedEntities[LS->getScopeNode()].empty()) + return true; + return false; + }; + for (LexicalScope *LS : Scope->getChildren()) { + // If the lexical block doesn't have non-scope children, skip + // its emission and put its children directly to the parent scope. + if (needToEmitLexicalScope(LS)) + constructScopeDIE(LS, ScopeDIE); + else + createAndAddScopeChildren(LS, ScopeDIE); + } return ObjectPointer; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 6e9261087686..fb03982b5e4a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -191,8 +191,7 @@ public: /// variables. DIE &updateSubprogramScopeDIE(const DISubprogram *SP); - void constructScopeDIE(LexicalScope *Scope, - SmallVectorImpl<DIE *> &FinalChildren); + void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE); /// A helper function to construct a RangeSpanList for a given /// lexical scope. @@ -220,11 +219,6 @@ public: /// Construct a DIE for the given DbgLabel. DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope); - /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(LexicalScope *Scope, - SmallVectorImpl<DIE *> &Children, - bool *HasNonScopeChildren = nullptr); - void createBaseTypeDIEs(); /// Construct a DIE for this subprogram scope. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 150f19324834..39f40b172c1b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -162,9 +162,7 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { bool MarkedNoUnwind = false; bool SawFunc = false; - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); - + for (const MachineOperand &MO : MI->operands()) { if (!MO.isGlobal()) continue; const Function *F = dyn_cast<Function>(MO.getGlobal()); @@ -386,8 +384,8 @@ MCSymbol *EHStreamer::emitExceptionTable() { SmallVector<const LandingPadInfo *, 64> LandingPads; LandingPads.reserve(PadInfos.size()); - for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) - LandingPads.push_back(&PadInfos[i]); + for (const LandingPadInfo &LPI : PadInfos) + LandingPads.push_back(&LPI); // Order landing pads lexicographically by type id. llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 5ac8f49a9522..64dadc82b48b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -1013,8 +1013,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // If this is a large problem, avoid visiting the same basic blocks // multiple times. if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); + for (const MergePotentialsElt &Elt : MergePotentials) + TriedMerging.insert(Elt.getBlock()); // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp index 50825ccf9bac..eda0f37fdeb7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -513,9 +513,7 @@ bool BranchRelaxation::relaxBranchInstructions() { // Relaxing branches involves creating new basic blocks, so re-eval // end() for termination. - for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) { - MachineBasicBlock &MBB = *I; - + for (MachineBasicBlock &MBB : *MF) { // Empty block? MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr(); if (Last == MBB.end()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp index e0e2db9f4725..bbdd8aab502e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp @@ -58,8 +58,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); initializeLowerIntrinsicsPass(Registry); + initializeMIRAddFSDiscriminatorsPass(Registry); initializeMIRCanonicalizerPass(Registry); initializeMIRNamerPass(Registry); + initializeMIRProfileLoaderPassPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp index a1ff02178ffa..3bed81d5841d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -90,7 +90,7 @@ CGOPT(bool, EnableAddrsig) CGOPT(bool, EmitCallSiteInfo) CGOPT(bool, EnableMachineFunctionSplitter) CGOPT(bool, EnableDebugEntryValues) -CGOPT(bool, ValueTrackingVariableLocations) +CGOPT_EXP(bool, ValueTrackingVariableLocations) CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) @@ -534,12 +534,17 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.EmitAddrsig = getEnableAddrsig(); Options.EmitCallSiteInfo = getEmitCallSiteInfo(); Options.EnableDebugEntryValues = getEnableDebugEntryValues(); - Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations(); Options.ForceDwarfFrameSection = getForceDwarfFrameSection(); Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); + if (auto Opt = getExplicitValueTrackingVariableLocations()) + Options.ValueTrackingVariableLocations = *Opt; + else + Options.ValueTrackingVariableLocations = + getDefaultValueTrackingVariableLocations(TheTriple); + Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); Options.ThreadModel = getThreadModel(); @@ -692,3 +697,9 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, for (Function &F : M) setFunctionAttributes(CPU, Features, F); } + +bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) { + if (T.getArch() == llvm::Triple::x86_64) + return true; + return false; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 981f5973fee8..4e98d49206b5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -370,9 +370,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, // Handle cases in which this instruction defines NewReg. MachineInstr *MI = RefOper->getParent(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &CheckOper = MI->getOperand(i); - + for (const MachineOperand &CheckOper : MI->operands()) { if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg)) return true; @@ -462,11 +460,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // Find the node at the bottom of the critical path. const SUnit *Max = nullptr; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - const SUnit *SU = &SUnits[i]; - MISUnitMap[SU->getInstr()] = SU; - if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) - Max = SU; + for (const SUnit &SU : SUnits) { + MISUnitMap[SU.getInstr()] = &SU; + if (!Max || SU.getDepth() + SU.Latency > Max->getDepth() + Max->Latency) + Max = &SU; } assert(Max && "Failed to find bottom of the critical path"); @@ -621,8 +618,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits, // is invalid. If the instruction defines other registers, // save a list of them so that we don't pick a new register // that overlaps any of them. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index c6c0b79cd7e7..0bb186a02416 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -76,8 +76,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { return false; // Examine each operand. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { @@ -87,7 +86,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } else { if (MO.isDead()) { #ifndef NDEBUG - // Sanity check on uses of this dead register. All of them should be + // Baisc check on the register. All of them should be // 'undef'. for (auto &U : MRI->use_nodbg_operands(Reg)) assert(U.isUndef() && "'Undef' use on a 'dead' register is found!"); @@ -152,8 +151,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { } // Record the physreg defs. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { @@ -171,8 +169,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 3a52959d54bf..755b3b844570 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" @@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI, Builder.setInstrAndDebugLoc(MI); auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI); NewPhi.addDef(DstReg); - for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) { - auto &MO = MI.getOperand(SrcIdx); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { if (!MO.isReg()) { NewPhi.addMBB(MO.getMBB()); continue; @@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( unsigned NumElts = DstTy.getNumElements(); SmallBitVector ExtractedElts(NumElts); - for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg), - MRI.use_instr_nodbg_end())) { + for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) { if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT) return false; auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); @@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase( MatchInfo(Builder); } +bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, + BuildFnTy &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_OR); + + Register Dst = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(Dst); + unsigned BitWidth = Ty.getScalarSizeInBits(); + + Register ShlSrc, ShlAmt, LShrSrc, LShrAmt; + unsigned FshOpc = 0; + + // Match (or (shl x, amt), (lshr y, sub(bw, amt))). + if (mi_match( + Dst, MRI, + // m_GOr() handles the commuted version as well. + m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)), + m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth), + m_Reg(LShrAmt)))))) { + FshOpc = TargetOpcode::G_FSHL; + + // Match (or (shl x, sub(bw, amt)), (lshr y, amt)). + } else if (mi_match(Dst, MRI, + m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)), + m_GShl(m_Reg(ShlSrc), + m_GSub(m_SpecificICstOrSplat(BitWidth), + m_Reg(ShlAmt)))))) { + FshOpc = TargetOpcode::G_FSHR; + + } else { + return false; + } + + if (ShlAmt != LShrAmt) + return false; + + LLT AmtTy = MRI.getType(ShlAmt); + if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt}); + }; + return true; +} + /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate. bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) { unsigned Opc = MI.getOpcode(); @@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd( bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { unsigned Opc = MI.getOpcode(); assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO); - // Check for a constant 2 or a splat of 2 on the RHS. - auto RHS = MI.getOperand(3).getReg(); - bool IsVector = MRI.getType(RHS).isVector(); - if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2))) - return false; - if (IsVector) { - // FIXME: There's no mi_match pattern for this yet. - auto *RHSDef = getDefIgnoringCopies(RHS, MRI); - if (!RHSDef) - return false; - auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI); - if (!Splat || *Splat != 2) - return false; - } + + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2))) + return false; MatchInfo = [=, &MI](MachineIRBuilder &B) { Observer.changingInstr(MI); @@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } +/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either +/// due to global flags or MachineInstr flags. +static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) { + if (MI.getOpcode() != TargetOpcode::G_FMUL) + return false; + return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract); +} + +static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, + const MachineRegisterInfo &MRI) { + return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()) > + std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()), + MRI.use_instr_nodbg_end()); +} + +bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI, + bool &AllowFusionGlobally, + bool &HasFMAD, bool &Aggressive, + bool CanReassociate) { + + auto *MF = MI.getMF(); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + const TargetOptions &Options = MF->getTarget().Options; + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + + if (CanReassociate && + !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc))) + return false; + + // Floating-point multiply-add with intermediate rounding. + HasFMAD = (LI && TLI.isFMADLegal(MI, DstType)); + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) && + isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}}); + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return false; + + AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath || HasFMAD; + // If the addition is not contractable, do not combine. + if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract)) + return false; + + Aggressive = TLI.enableAggressiveFMAFusion(DstType); + return true; +} + +bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (isContractableFMul(*LHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), + RHS->getOperand(0).getReg()}); + }; + return true; + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + if (isContractableFMul(*RHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(), + LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + MachineInstr *FpExtSrc; + if (mi_match(LHS->getOperand(0).getReg(), MRI, + m_GFPExt(m_MInstr(FpExtSrc))) && + isContractableFMul(*FpExtSrc, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FpExtSrc->getOperand(1).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); + auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()}); + }; + return true; + } + + // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z) + // Note: Commutes FADD operands. + if (mi_match(RHS->getOperand(0).getReg(), MRI, + m_GFPExt(m_MInstr(FpExtSrc))) && + isContractableFMul(*FpExtSrc, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FpExtSrc->getOperand(1).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg()); + auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg()); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + MachineInstr *FMA = nullptr; + Register Z; + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) + if (LHS->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() == + TargetOpcode::G_FMUL) && + MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) { + FMA = LHS; + Z = RHS->getOperand(0).getReg(); + } + // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z)) + else if (RHS->getOpcode() == PreferredFusedOpcode && + (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() == + TargetOpcode::G_FMUL) && + MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) && + MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) { + Z = LHS->getOperand(0).getReg(); + FMA = RHS; + } + + if (FMA) { + MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg()); + Register X = FMA->getOperand(1).getReg(); + Register Y = FMA->getOperand(2).getReg(); + Register U = FMulMI->getOperand(1).getReg(); + Register V = FMulMI->getOperand(2).getReg(); + + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register InnerFMA = MRI.createGenericVirtualRegister(DstTy); + B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z}); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {X, Y, InnerFMA}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FADD); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + if (!Aggressive) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally)) { + if (hasMoreUses(*LHS, *RHS, MRI)) + std::swap(LHS, RHS); + } + + // Builds: (fma x, y, (fma (fpext u), (fpext v), z)) + auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X, + Register Y, MachineIRBuilder &B) { + Register FpExtU = B.buildFPExt(DstType, U).getReg(0); + Register FpExtV = B.buildFPExt(DstType, V).getReg(0); + Register InnerFMA = + B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z}) + .getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {X, Y, InnerFMA}); + }; + + MachineInstr *FMulMI, *FMAMI; + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + if (LHS->getOpcode() == PreferredFusedOpcode && + mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(), + LHS->getOperand(2).getReg(), B); + }; + return true; + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + FMAMI->getOpcode() == PreferredFusedOpcode) { + MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); + if (isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMAMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + Register X = FMAMI->getOperand(1).getReg(); + Register Y = FMAMI->getOperand(2).getReg(); + X = B.buildFPExt(DstType, X).getReg(0); + Y = B.buildFPExt(DstType, Y).getReg(0); + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + RHS->getOperand(0).getReg(), X, Y, B); + }; + + return true; + } + } + + // fold (fadd z, (fma x, y, (fpext (fmul u, v))) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + if (RHS->getOpcode() == PreferredFusedOpcode && + mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(), + RHS->getOperand(2).getReg(), B); + }; + return true; + } + + // fold (fadd z, (fpext (fma x, y, (fmul u, v))) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) && + FMAMI->getOpcode() == PreferredFusedOpcode) { + MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg()); + if (isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType, + MRI.getType(FMAMI->getOperand(0).getReg()))) { + MatchInfo = [=](MachineIRBuilder &B) { + Register X = FMAMI->getOperand(1).getReg(); + Register Y = FMAMI->getOperand(2).getReg(); + X = B.buildFPExt(DstType, X).getReg(0); + Y = B.buildFPExt(DstType, Y).getReg(0); + buildMatchInfo(FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), + LHS->getOperand(0).getReg(), X, Y, B); + }; + return true; + } + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg()); + MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg()); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + int FirstMulHasFewerUses = true; + if (isContractableFMul(*LHS, AllowFusionGlobally) && + isContractableFMul(*RHS, AllowFusionGlobally) && + hasMoreUses(*LHS, *RHS, MRI)) + FirstMulHasFewerUses = false; + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + // fold (fsub (fmul x, y), z) -> (fma x, y, -z) + if (FirstMulHasFewerUses && + (isContractableFMul(*LHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ}); + }; + return true; + } + // fold (fsub x, (fmul y, z)) -> (fma -y, z, x) + else if ((isContractableFMul(*RHS, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0); + B.buildInstr( + PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + MachineInstr *FMulMI; + // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) && + (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) && + MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) && + isContractableFMul(*FMulMI, AllowFusionGlobally)) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register NegX = + B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegX, FMulMI->getOperand(2).getReg(), NegZ}); + }; + return true; + } + + // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x) + if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) && + (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) && + MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) && + isContractableFMul(*FMulMI, AllowFusionGlobally)) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), LHSReg}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + MachineInstr *FMulMI; + // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) + if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FpExtX = + B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register FpExtY = + B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0); + Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {FpExtX, FpExtY, NegZ}); + }; + return true; + } + + // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x) + if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FpExtY = + B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0); + Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0); + Register FpExtZ = + B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0); + B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()}, + {NegY, FpExtZ, LHSReg}); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( + MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + bool AllowFusionGlobally, HasFMAD, Aggressive; + if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive)) + return false; + + const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + Register LHSReg = MI.getOperand(1).getReg(); + Register RHSReg = MI.getOperand(2).getReg(); + + unsigned PreferredFusedOpcode = + HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA; + + auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z, + MachineIRBuilder &B) { + Register FpExtX = B.buildFPExt(DstTy, X).getReg(0); + Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0); + B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z}); + }; + + MachineInstr *FMulMI; + // fold (fsub (fpext (fneg (fmul x, y))), z) -> + // (fneg (fma (fpext x), (fpext y), z)) + // fold (fsub (fneg (fpext (fmul x, y))), z) -> + // (fneg (fma (fpext x), (fpext y), z)) + if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) || + mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + Register FMAReg = MRI.createGenericVirtualRegister(DstTy); + buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), RHSReg, B); + B.buildFNeg(MI.getOperand(0).getReg(), FMAReg); + }; + return true; + } + + // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x) + // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x) + if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) || + mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) && + isContractableFMul(*FMulMI, AllowFusionGlobally) && + TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy, + MRI.getType(FMulMI->getOperand(0).getReg()))) { + MatchInfo = [=, &MI](MachineIRBuilder &B) { + buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(), + FMulMI->getOperand(2).getReg(), LHSReg, B); + }; + return true; + } + + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c74bec7dfc0d..e09cd26eb0c1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -585,8 +585,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, // FIXME: What does the original arg index mean here? SmallVector<CallLowering::ArgInfo, 3> Args; - for (unsigned i = 1; i < MI.getNumOperands(); i++) - Args.push_back({MI.getOperand(i).getReg(), OpType, 0}); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + Args.push_back({MO.getReg(), OpType, 0}); return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType, 0}, Args); } @@ -1500,8 +1500,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideDstTy = LLT::scalar(NumMerge * WideSize); // Decompose the original operands if they don't evenly divide. - for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { + Register SrcReg = MO.getReg(); if (GCD == SrcSize) { Unmerges.push_back(SrcReg); } else { @@ -4037,8 +4037,8 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, // Break into a common type SmallVector<Register, 16> Parts; - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) - extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg()); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + extractGCDType(Parts, GCDTy, MO.getReg()); // Build the requested new merge, padding with undef. LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, @@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, // of that value loaded. This can result in a sequence of loads and stores // mixed types, depending on what the target specifies as good types to use. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); unsigned Size = KnownLen; for (auto CopyTy : MemOps) { // Issuing an unaligned load / store pair that overlaps with the previous @@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, Register LoadPtr = Src; Register Offset; if (CurrOffset != 0) { - Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + LLT SrcTy = MRI.getType(Src); + Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset) .getReg(0); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); // Create the store. - Register StorePtr = - CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + Register StorePtr = Dst; + if (CurrOffset != 0) { + LLT DstTy = MRI.getType(Dst); + StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); + } MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); Size -= CopyTy.getSizeInBytes(); @@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, // Apart from that, this loop is pretty much doing the same thing as the // memcpy codegen function. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); SmallVector<Register, 16> LoadVals; for (auto CopyTy : MemOps) { // Construct MMO for the load. @@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, // Create the load. Register LoadPtr = Src; if (CurrOffset != 0) { + LLT SrcTy = MRI.getType(Src); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, Register StorePtr = Dst; if (CurrOffset != 0) { + LLT DstTy = MRI.getType(Dst); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 1a2102e3ef21..650500c7eb31 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -123,7 +123,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( Register Reg = MI.getOperand(OpIdx).getReg(); const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg)); - // Sanity check that the target properly implemented getRegBankFromRegClass. + // Check that the target properly implemented getRegBankFromRegClass. assert(RegBank.covers(*RC) && "The mapping of the register bank does not make sense"); return &RegBank; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 1a440c064a59..b0b84763e922 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -834,10 +834,9 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, case TargetOpcode::G_BUILD_VECTOR: { // TODO: Probably should have a recursion depth guard since you could have // bitcasted vector elements. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB)) + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) + if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB)) return false; - } return true; } @@ -845,8 +844,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI, // Only handle constants since we would need to know if number of leading // zeros is greater than the truncation amount. const unsigned BitWidth = Ty.getScalarSizeInBits(); - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI); + for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) { + auto Const = getIConstantVRegVal(MO.getReg(), MRI); if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2()) return false; } @@ -1031,16 +1030,22 @@ Optional<ValueAndVReg> getAnyConstantSplat(Register VReg, return SplatValAndReg; } -bool isBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI, - int64_t SplatValue, bool AllowUndef) { - if (auto SplatValAndReg = - getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef)) +} // end anonymous namespace + +bool llvm::isBuildVectorConstantSplat(const Register Reg, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef)) return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue)); return false; } -} // end anonymous namespace +bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef) { + return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue, + AllowUndef); +} Optional<int64_t> llvm::getBuildVectorConstantSplat(const MachineInstr &MI, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp index 6c1ce4c1efb0..bbd9006a5d8c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp @@ -399,8 +399,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, // having a single global, but is aggressive enough for any other case. if (GlobalMergeIgnoreSingleUse) { BitVector AllGlobals(Globals.size()); - for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { - const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) { if (UGS.UsageCount == 0) continue; if (UGS.Globals.count() > 1) @@ -418,8 +417,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, BitVector PickedGlobals(Globals.size()); bool Changed = false; - for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { - const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) { if (UGS.UsageCount == 0) continue; if (PickedGlobals.anyCommon(UGS.Globals)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index e4606daba352..2d38a44d5a33 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -260,10 +260,12 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) { if (DTU) { // If there were multiple indirectbr's, they may have common successors, // but in the dominator tree, we only track unique edges. - SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end()); - Updates.reserve(Updates.size() + UniqueSuccessors.size()); - for (BasicBlock *BB : UniqueSuccessors) - Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + SmallPtrSet<BasicBlock *, 8> UniqueSuccessors; + Updates.reserve(Updates.size() + BBs.size()); + for (BasicBlock *BB : BBs) { + if (UniqueSuccessors.insert(BB).second) + Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + } DTU->applyUpdates(Updates); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index 64e1f4351456..fc5ac45752ca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -274,11 +274,9 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) { } static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) { - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); + for (const MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg())) LIS.getInterval(MO.getReg()); - } } /// isSnippet - Identify if a live interval is a snippet that should be spilled. @@ -583,11 +581,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { if (!ParentVNI) { LLVM_DEBUG(dbgs() << "\tadding <undef> flags: "); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg()) MO.setIsUndef(); - } LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp index c3e0553418a5..fab6b8d10a33 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -73,11 +73,9 @@ void LatencyPriorityQueue::push(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (getSingleUnscheduledPred(I->getSUnit()) == SU) + for (const SDep &Succ : SU->Succs) + if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) ++NumNodesBlocking; - } NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; Queue.push_back(SU); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index a4eb3094612b..cf62b0e5d7e8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -544,8 +544,7 @@ public: // Re-state the variable location: if there's no replacement then NewLoc // is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE // identifying the alternative location will be emitted. - const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr; - DbgValueProperties Properties(Expr, false); + const DbgValueProperties &Properties = ActiveVLocIt->second.Properties; PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties)); // Update machine locations <=> variable locations maps. Defer updating @@ -836,6 +835,15 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, unsigned Base = Spill.SpillBase; MIB.addReg(Base); MIB.addImm(0); + + // Being on the stack makes this location indirect; if it was _already_ + // indirect though, we need to add extra indirection. See this test for + // a scenario where this happens: + // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll + if (Properties.Indirect) { + std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; + Expr = DIExpression::append(Expr, Elts); + } } else { // This is a stack location with a weird subregister offset: emit an undef // DBG_VALUE instead. @@ -1288,6 +1296,24 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { } else if (MI.isMetaInstruction()) return; + // We always ignore SP defines on call instructions, they don't actually + // change the value of the stack pointer... except for win32's _chkstk. This + // is rare: filter quickly for the common case (no stack adjustments, not a + // call, etc). If it is a call that modifies SP, recognise the SP register + // defs. + bool CallChangesSP = false; + if (AdjustsStackInCalls && MI.isCall() && MI.getOperand(0).isSymbol() && + !strcmp(MI.getOperand(0).getSymbolName(), StackProbeSymbolName.data())) + CallChangesSP = true; + + // Test whether we should ignore a def of this register due to it being part + // of the stack pointer. + auto IgnoreSPAlias = [this, &MI, CallChangesSP](Register R) -> bool { + if (CallChangesSP) + return false; + return MI.isCall() && MTracker->SPAliases.count(R); + }; + // Find the regs killed by MI, and find regmasks of preserved regs. // Max out the number of statically allocated elements in `DeadRegs`, as this // prevents fallback to std::set::count() operations. @@ -1298,7 +1324,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { // Determine whether the operand is a register def. if (MO.isReg() && MO.isDef() && MO.getReg() && Register::isPhysicalRegister(MO.getReg()) && - !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) { + !IgnoreSPAlias(MO.getReg())) { // Remove ranges of all aliased registers. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) // FIXME: Can we break out of this loop early if no insertion occurs? @@ -1347,6 +1373,9 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) { continue; Register Reg = MTracker->LocIdxToLocID[L.Idx]; + if (IgnoreSPAlias(Reg)) + continue; + for (auto *MO : RegMaskPtrs) if (MO->clobbersPhysReg(Reg)) TTracker->clobberMloc(L.Idx, MI.getIterator(), false); @@ -1628,9 +1657,10 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { /// fragments of that DILocalVariable which overlap. This reduces work during /// the data-flow stage from "Find any overlapping fragments" to "Check if the /// known-to-overlap fragments are present". -/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for +/// \param MI A previously unprocessed debug instruction to analyze for /// fragment usage. void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { + assert(MI.isDebugValue() || MI.isDebugRef()); DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); FragmentInfo ThisFragment = MIVar.getFragmentOrDefault(); @@ -1732,7 +1762,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( for (auto &MI : MBB) { process(MI); // Also accumulate fragment map. - if (MI.isDebugValue()) + if (MI.isDebugValue() || MI.isDebugRef()) accumulateFragmentMap(MI); // Create a map from the instruction number (if present) to the @@ -2322,15 +2352,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( bool InstrRefBasedLDV::vlocJoin( MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, - SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, DbgValue &LiveIn) { - // To emulate VarLocBasedImpl, process this block if it's not in scope but - // _does_ assign a variable value. No live-ins for this scope are transferred - // in though, so we can return immediately. - if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) - return false; - LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -2466,11 +2489,10 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, // "blocks that are potentially in scope. See comment at start of vlocJoin. SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore; - // Old LiveDebugValues tracks variable locations that come out of blocks - // not in scope, where DBG_VALUEs occur. This is something we could - // legitimately ignore, but lets allow it for now. - if (EmulateOldLDV) - BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); + // VarLoc LiveDebugValues tracks variable locations that are defined in + // blocks not in scope. This is something we could legitimately ignore, but + // lets allow it for now for the sake of coverage. + BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end()); // We also need to propagate variable values through any artificial blocks // that immediately follow blocks in scope. @@ -2635,7 +2657,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, // Join values from predecessors. Updates LiveInIdx, and writes output // into JoinedInLocs. bool InLocsChanged = - vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn); + vlocJoin(*MBB, LiveOutIdx, BlocksToExplore, *LiveIn); SmallVector<const MachineBasicBlock *, 8> Preds; for (const auto *Pred : MBB->predecessors()) @@ -2730,6 +2752,8 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc, continue; if (BlockLiveIn->Kind == DbgValue::VPHI) BlockLiveIn->Kind = DbgValue::Def; + assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() == + Var.getFragment() && "Fragment info missing during value prop"); Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn)); } } // Per-variable loop. @@ -2879,6 +2903,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, MFI = &MF.getFrameInfo(); LS.initialize(MF); + const auto &STI = MF.getSubtarget(); + AdjustsStackInCalls = MFI->adjustsStack() && + STI.getFrameLowering()->stackProbeFunctionModifiesSP(); + if (AdjustsStackInCalls) + StackProbeSymbolName = STI.getTargetLowering()->getStackProbeSymbolName(MF); + MTracker = new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering()); VTracker = nullptr; @@ -2895,7 +2925,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, ++MaxNumBlocks; MLocTransfer.resize(MaxNumBlocks); - vlocs.resize(MaxNumBlocks); + vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr)); SavedLiveIns.resize(MaxNumBlocks); initialSetup(MF); @@ -3040,6 +3070,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, BBNumToRPO.clear(); DebugInstrNumToInstr.clear(); DebugPHINumToValue.clear(); + OverlapFragments.clear(); + SeenFragments.clear(); return Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index d96ef6d4f6e5..789205e61cdb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -655,6 +655,14 @@ public: const DbgValueProperties &Properties); }; +/// Types for recording sets of variable fragments that overlap. For a given +/// local variable, we record all other fragments of that variable that could +/// overlap it, to reduce search time. +using FragmentOfVar = + std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; +using OverlapMap = + DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + /// Collection of DBG_VALUEs observed when traversing a block. Records each /// variable and the value the DBG_VALUE refers to. Requires the machine value /// location dataflow algorithm to have run already, so that values can be @@ -672,9 +680,12 @@ public: MapVector<DebugVariable, DbgValue> Vars; DenseMap<DebugVariable, const DILocation *> Scopes; MachineBasicBlock *MBB = nullptr; + const OverlapMap &OverlappingFragments; + DbgValueProperties EmptyProperties; public: - VLocTracker() {} + VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr) + : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {} void defVar(const MachineInstr &MI, const DbgValueProperties &Properties, Optional<ValueIDNum> ID) { @@ -689,6 +700,8 @@ public: if (!Result.second) Result.first->second = Rec; Scopes[Var] = MI.getDebugLoc().get(); + + considerOverlaps(Var, MI.getDebugLoc().get()); } void defVar(const MachineInstr &MI, const MachineOperand &MO) { @@ -704,16 +717,37 @@ public: if (!Result.second) Result.first->second = Rec; Scopes[Var] = MI.getDebugLoc().get(); + + considerOverlaps(Var, MI.getDebugLoc().get()); } -}; -/// Types for recording sets of variable fragments that overlap. For a given -/// local variable, we record all other fragments of that variable that could -/// overlap it, to reduce search time. -using FragmentOfVar = - std::pair<const DILocalVariable *, DIExpression::FragmentInfo>; -using OverlapMap = - DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>; + void considerOverlaps(const DebugVariable &Var, const DILocation *Loc) { + auto Overlaps = OverlappingFragments.find( + {Var.getVariable(), Var.getFragmentOrDefault()}); + if (Overlaps == OverlappingFragments.end()) + return; + + // Otherwise: terminate any overlapped variable locations. + for (auto FragmentInfo : Overlaps->second) { + // The "empty" fragment is stored as DebugVariable::DefaultFragment, so + // that it overlaps with everything, however its cannonical representation + // in a DebugVariable is as "None". + Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo; + if (DebugVariable::isDefaultFragment(FragmentInfo)) + OptFragmentInfo = None; + + DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo, + Var.getInlinedAt()); + DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef); + + // Attempt insertion; overwrite if it's already mapped. + auto Result = Vars.insert(std::make_pair(Overlapped, Rec)); + if (!Result.second) + Result.first->second = Rec; + Scopes[Overlapped] = Loc; + } + } +}; // XXX XXX docs class InstrRefBasedLDV : public LDVImpl { @@ -817,6 +851,16 @@ private: OverlapMap OverlapFragments; VarToFragments SeenFragments; + /// True if we need to examine call instructions for stack clobbers. We + /// normally assume that they don't clobber SP, but stack probes on Windows + /// do. + bool AdjustsStackInCalls = false; + + /// If AdjustsStackInCalls is true, this holds the name of the target's stack + /// probe function, which is the function we expect will alter the stack + /// pointer. + StringRef StackProbeSymbolName; + /// Tests whether this instruction is a spill to a stack slot. bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF); @@ -962,7 +1006,6 @@ private: /// \returns true if any live-ins change value, either from value propagation /// or PHI elimination. bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, - SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks, SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore, DbgValue &LiveIn); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index dcd546f9c6db..5f976bf43c5b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1875,34 +1875,57 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n"); - // Re-insert any debug instrs back in the position they were. Ordering - // is preserved by vector. We must re-insert in the same order to ensure that - // debug instructions don't swap, which could re-order assignments. - for (auto &P : StashedDebugInstrs) { - SlotIndex Idx = P.Idx; + // Re-insert any debug instrs back in the position they were. We must + // re-insert in the same order to ensure that debug instructions don't swap, + // which could re-order assignments. Do so in a batch -- once we find the + // insert position, insert all instructions at the same SlotIdx. They are + // guaranteed to appear in-sequence in StashedDebugInstrs because we insert + // them in order. + for (auto StashIt = StashedDebugInstrs.begin(); + StashIt != StashedDebugInstrs.end(); ++StashIt) { + SlotIndex Idx = StashIt->Idx; + MachineBasicBlock *MBB = StashIt->MBB; + MachineInstr *MI = StashIt->MI; + + auto EmitInstsHere = [this, &StashIt, MBB, Idx, + MI](MachineBasicBlock::iterator InsertPos) { + // Insert this debug instruction. + MBB->insert(InsertPos, MI); + + // Look at subsequent stashed debug instructions: if they're at the same + // index, insert those too. + auto NextItem = std::next(StashIt); + while (NextItem != StashedDebugInstrs.end() && NextItem->Idx == Idx) { + assert(NextItem->MBB == MBB && "Instrs with same slot index should be" + "in the same block"); + MBB->insert(InsertPos, NextItem->MI); + StashIt = NextItem; + NextItem = std::next(StashIt); + }; + }; // Start block index: find the first non-debug instr in the block, and // insert before it. - if (Idx == Slots->getMBBStartIdx(P.MBB)) { + if (Idx == Slots->getMBBStartIdx(MBB)) { MachineBasicBlock::iterator InsertPos = - findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap); - P.MBB->insert(InsertPos, P.MI); + findInsertLocation(MBB, Idx, *LIS, BBSkipInstsMap); + EmitInstsHere(InsertPos); continue; } if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) { // Insert at the end of any debug instructions. auto PostDebug = std::next(Pos->getIterator()); - PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end()); - P.MBB->insert(PostDebug, P.MI); + PostDebug = skipDebugInstructionsForward(PostDebug, MBB->instr_end()); + EmitInstsHere(PostDebug); } else { // Insert position disappeared; walk forwards through slots until we // find a new one. - SlotIndex End = Slots->getMBBEndIdx(P.MBB); + SlotIndex End = Slots->getMBBEndIdx(MBB); for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) { Pos = Slots->getInstructionFromIndex(Idx); if (Pos) { - P.MBB->insert(Pos->getIterator(), P.MI); + EmitInstsHere(Pos->getIterator()); break; } } @@ -1911,8 +1934,8 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { // insert! It's not safe to discard any debug instructions; place them // in front of the first terminator, or in front of end(). if (Idx >= End) { - auto TermIt = P.MBB->getFirstTerminator(); - P.MBB->insert(TermIt, P.MI); + auto TermIt = MBB->getFirstTerminator(); + EmitInstsHere(TermIt); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index d91ff734ad8f..6380c4bfd6e6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -108,8 +108,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true)); - for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = OrigMI->getOperand(i); + for (const MachineOperand &MO : OrigMI->operands()) { if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) continue; @@ -425,15 +424,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling // them results in incorrect code. - bool BeingSpilled = false; - for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { - if (VReg == RegsBeingSpilled[i]) { - BeingSpilled = true; - break; - } - } - - if (BeingSpilled) continue; + if (llvm::is_contained(RegsBeingSpilled, VReg)) + continue; // LI may have been separated, create new intervals. LI->RenumberValues(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h index dace05f1ad95..ada5c5be484a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeUtils.h @@ -18,7 +18,7 @@ namespace llvm { /// Helper function that distributes live range value numbers and the -/// corresponding segments of a master live range \p LR to a list of newly +/// corresponding segments of a primary live range \p LR to a list of newly /// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p /// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific /// live range in the \p SplitLRs array. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp index 51ba4b7e53eb..e8744797707b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveVariables.cpp @@ -58,9 +58,9 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineInstr * LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { - for (unsigned i = 0, e = Kills.size(); i != e; ++i) - if (Kills[i]->getParent() == MBB) - return Kills[i]; + for (MachineInstr *MI : Kills) + if (MI->getParent() == MBB) + return MI; return nullptr; } @@ -811,8 +811,8 @@ bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) { LiveVariables::VarInfo &VI = getVarInfo(Reg); SmallPtrSet<const MachineBasicBlock *, 8> Kills; - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - Kills.insert(VI.Kills[i]->getParent()); + for (MachineInstr *MI : VI.Kills) + Kills.insert(MI->getParent()); // Loop over all of the successors of the basic block, checking to see if // the value is either live in the block, or if it is killed in the block. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 2e99c8595cbd..ee2387d1e8e6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -316,14 +316,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // than that, but the increased register pressure makes that a // tricky thing to balance. Investigate if re-materializing these // becomes an issue. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + for (const MachineOperand &MO : MI.operands()) { // Consider replacing all frame index operands that reference // an object allocated in the local block. - if (MI.getOperand(i).isFI()) { + if (MO.isFI()) { // Don't try this with values not in the local block. - if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex())) + if (!MFI.isObjectPreAllocated(MO.getIndex())) break; - int Idx = MI.getOperand(i).getIndex(); + int Idx = MO.getIndex(); int64_t LocalOffset = LocalOffsets[Idx]; if (!TRI->needsFrameBaseReg(&MI, LocalOffset)) break; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp index 90ecc6fc68fc..b742ad9823c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -314,6 +314,8 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) { } bool Changed = MIRSampleLoader->runOnFunction(MF); + if (Changed) + MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>()); if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 366d06871245..310c2721c3bd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -1170,9 +1170,10 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI) void MachineFunction::finalizeDebugInstrRefs() { auto *TII = getSubtarget().getInstrInfo(); - auto MakeDbgValue = [&](MachineInstr &MI) { + auto MakeUndefDbgValue = [&](MachineInstr &MI) { const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE); MI.setDesc(RefII); + MI.getOperand(0).setReg(0); MI.getOperand(1).ChangeToRegister(0, false); }; @@ -1187,15 +1188,15 @@ void MachineFunction::finalizeDebugInstrRefs() { Register Reg = MI.getOperand(0).getReg(); // Some vregs can be deleted as redundant in the meantime. Mark those - // as DBG_VALUE $noreg. - if (Reg == 0) { - MakeDbgValue(MI); + // as DBG_VALUE $noreg. Additionally, some normal instructions are + // quickly deleted, leaving dangling references to vregs with no def. + if (Reg == 0 || !RegInfo->hasOneDef(Reg)) { + MakeUndefDbgValue(MI); continue; } assert(Reg.isVirtual()); MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg); - assert(RegInfo->hasOneDef(Reg)); // If we've found a copy-like instruction, follow it back to the // instruction that defines the source value, see salvageCopySSA docs @@ -1327,9 +1328,9 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, assert(Old != New && "Not making a change?"); bool MadeChange = false; MachineJumpTableEntry &JTE = JumpTables[Idx]; - for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) - if (JTE.MBBs[j] == Old) { - JTE.MBBs[j] = New; + for (MachineBasicBlock *&MBB : JTE.MBBs) + if (MBB == Old) { + MBB = New; MadeChange = true; } return MadeChange; @@ -1342,8 +1343,8 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { OS << printJumpTableEntryReference(i) << ':'; - for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) - OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]); + for (const MachineBasicBlock *MBB : JumpTables[i].MBBs) + OS << ' ' << printMBBReference(*MBB); if (i != e) OS << '\n'; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index 5c4f75e9ceb9..aaa80432d2f2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -1490,12 +1490,10 @@ bool MachineInstr::allDefsAreDead() const { /// instruction to this instruction. void MachineInstr::copyImplicitOps(MachineFunction &MF, const MachineInstr &MI) { - for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands(); - i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : + llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands())) if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) addOperand(MF, MO); - } } bool MachineInstr::hasComplexRegisterTies() const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp index 4d080e1a4f82..680dbe54ffaf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp @@ -1071,7 +1071,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { // The Value and Offset may differ due to CSE. But the flags and size // should be the same. assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); - assert(MMO->getSize() == getSize() && "Size mismatch!"); + assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) || + MMO->getSize() == getSize()) && + "Size mismatch!"); if (MMO->getBaseAlign() >= getBaseAlign()) { // Update the alignment value. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp index cfbccebaff3e..7783b5e0d3cc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp @@ -617,20 +617,11 @@ MachineFunction *MachineOutliner::createOutlinedFunction( F->addFnAttr(Attribute::OptimizeForSize); F->addFnAttr(Attribute::MinSize); - // Include target features from an arbitrary candidate for the outlined - // function. This makes sure the outlined function knows what kinds of - // instructions are going into it. This is fine, since all parent functions - // must necessarily support the instructions that are in the outlined region. Candidate &FirstCand = OF.Candidates.front(); - const Function &ParentFn = FirstCand.getMF()->getFunction(); - if (ParentFn.hasFnAttribute("target-features")) - F->addFnAttr(ParentFn.getFnAttribute("target-features")); + const TargetInstrInfo &TII = + *FirstCand.getMF()->getSubtarget().getInstrInfo(); - // Set nounwind, so we don't generate eh_frame. - if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) { - return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); - })) - F->addFnAttr(Attribute::NoUnwind); + TII.mergeOutliningCandidateAttributes(*F, OF.Candidates); BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); @@ -639,8 +630,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction( MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); - const TargetSubtargetInfo &STI = MF.getSubtarget(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert the new function into the module. MF.insert(MF.begin(), &MBB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index e18318386def..8d6459a627fa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1455,17 +1455,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { int asap = 0; int zeroLatencyDepth = 0; SUnit *SU = &SUnits[I]; - for (SUnit::const_pred_iterator IP = SU->Preds.begin(), - EP = SU->Preds.end(); - IP != EP; ++IP) { - SUnit *pred = IP->getSUnit(); - if (IP->getLatency() == 0) + for (const SDep &P : SU->Preds) { + SUnit *pred = P.getSUnit(); + if (P.getLatency() == 0) zeroLatencyDepth = std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1); - if (ignoreDependence(*IP, true)) + if (ignoreDependence(P, true)) continue; - asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() - - getDistance(pred, SU, *IP) * MII)); + asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() - + getDistance(pred, SU, P) * MII)); } maxASAP = std::max(maxASAP, asap); ScheduleInfo[I].ASAP = asap; @@ -1521,9 +1519,8 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, SmallSetVector<SUnit *, 8> &Preds, const NodeSet *S = nullptr) { Preds.clear(); - for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end(); - I != E; ++I) { - for (const SDep &Pred : (*I)->Preds) { + for (const SUnit *SU : NodeOrder) { + for (const SDep &Pred : SU->Preds) { if (S && S->count(Pred.getSUnit()) == 0) continue; if (ignoreDependence(Pred, true)) @@ -1532,7 +1529,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder, Preds.insert(Pred.getSUnit()); } // Back-edges are predecessors with an anti-dependence. - for (const SDep &Succ : (*I)->Succs) { + for (const SDep &Succ : SU->Succs) { if (Succ.getKind() != SDep::Anti) continue; if (S && S->count(Succ.getSUnit()) == 0) @@ -2546,8 +2543,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, unsigned Pos = 0; for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E; ++I, ++Pos) { - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 30745c7a5583..54c478645dcf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -596,8 +596,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); @@ -789,8 +788,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, // If this instruction is inside a loop and sinking this instruction can make // more registers live range shorten, it is still prifitable. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. if (!MO.isReg()) continue; @@ -889,8 +887,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = nullptr; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; // Ignore non-register operands. Register Reg = MO.getReg(); @@ -1322,8 +1319,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isUse()) continue; Register Reg = MO.getReg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index d6bb3e7c9e58..32078db76cf3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1276,11 +1276,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (DstTy.getNumElements() != MI->getNumOperands() - 1) report("G_BUILD_VECTOR must have an operand for each elemement", MI); - for (unsigned i = 2; i < MI->getNumOperands(); ++i) { - if (MRI->getType(MI->getOperand(1).getReg()) != - MRI->getType(MI->getOperand(i).getReg())) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_BUILD_VECTOR source operand types are not homogeneous", MI); - } break; } @@ -1292,12 +1290,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (!DstTy.isVector() || SrcEltTy.isVector()) report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands", MI); - for (unsigned i = 2; i < MI->getNumOperands(); ++i) { - if (MRI->getType(MI->getOperand(1).getReg()) != - MRI->getType(MI->getOperand(i).getReg())) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous", MI); - } if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits()) report("G_BUILD_VECTOR_TRUNC source operand types are not larger than " "dest elt type", @@ -1316,11 +1312,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (MI->getNumOperands() < 3) report("G_CONCAT_VECTOR requires at least 2 source operands", MI); - for (unsigned i = 2; i < MI->getNumOperands(); ++i) { - if (MRI->getType(MI->getOperand(1).getReg()) != - MRI->getType(MI->getOperand(i).getReg())) + for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2)) + if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg())) report("G_CONCAT_VECTOR source operand types are not homogeneous", MI); - } if (DstTy.getNumElements() != SrcTy.getNumElements() * (MI->getNumOperands() - 1)) report("G_CONCAT_VECTOR num dest and source elements should match", MI); @@ -3063,9 +3057,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred); // Predecessor of landing pad live-out on last call. if (MFI->isEHPad()) { - for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) { - if (I->isCall()) { - PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex(); + for (const MachineInstr &MI : llvm::reverse(*Pred)) { + if (MI.isCall()) { + PEnd = Indexes->getInstructionIndex(MI).getBoundaryIndex(); break; } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index 8b3cdfab4d42..aaa6403cc978 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -73,8 +73,7 @@ void ModuloScheduleExpander::expand() { // stage difference for each use. Keep the maximum value. for (MachineInstr *MI : Schedule.getInstructions()) { int DefStage = Schedule.getStage(MI); - for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { - MachineOperand &Op = MI->getOperand(i); + for (const MachineOperand &Op : MI->operands()) { if (!Op.isReg() || !Op.isDef()) continue; @@ -1006,8 +1005,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, unsigned CurStageNum, unsigned InstrStageNum, ValueMapTy *VRMap) { - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); + for (MachineOperand &MO : NewMI->operands()) { if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg())) continue; Register reg = MO.getReg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 9a4f70a6070f..29a88480fd9f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -527,9 +527,9 @@ static void updateLiveness(MachineFunction &MF) { const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (const CalleeSavedInfo &I : CSI) { for (MachineBasicBlock *MBB : Visited) { - MCPhysReg Reg = CSI[i].getReg(); + MCPhysReg Reg = I.getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) @@ -540,17 +540,16 @@ static void updateLiveness(MachineFunction &MF) { // each MBB between the prologue and epilogue so that it is not clobbered // before it is reloaded in the epilogue. The Visited set contains all // blocks outside of the region delimited by prologue/epilogue. - if (CSI[i].isSpilledToReg()) { + if (I.isSpilledToReg()) { for (MachineBasicBlock &MBB : MF) { if (Visited.count(&MBB)) continue; - MCPhysReg DstReg = CSI[i].getDstReg(); + MCPhysReg DstReg = I.getDstReg(); if (!MBB.isLiveIn(DstReg)) MBB.addLiveIn(DstReg); } } } - } /// Insert restore code for the callee-saved registers used in the function. @@ -902,9 +901,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() && - RegInfo->useFPForScavengingIndex(MF) && - !RegInfo->hasStackRealignment(MF)); + bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF); if (RS && EarlyScavengingSlots) { SmallVector<int, 2> SFIs; RS->getScavengingFrameIndices(SFIs); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index 68920e2e50df..6653145d3d2a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1258,8 +1258,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of <def,read-undef>). - for (unsigned I = MI.getNumOperands(); I-- > 0;) { - MachineOperand &MO = MI.getOperand(I); + for (MachineOperand &MO : llvm::reverse(MI.operands())) { if (!MO.isReg() || !MO.isDef()) continue; @@ -1362,8 +1361,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Free early clobbers. if (HasEarlyClobber) { - for (unsigned I = MI.getNumOperands(); I-- > 0; ) { - MachineOperand &MO = MI.getOperand(I); + for (MachineOperand &MO : llvm::reverse(MI.operands())) { if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) continue; // subreg defs don't free the full register. We left the subreg number @@ -1440,8 +1438,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) { MachineBasicBlock::instr_iterator BundledMI = MI.getIterator(); ++BundledMI; while (BundledMI->isBundledWithPred()) { - for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) { - MachineOperand &MO = BundledMI->getOperand(I); + for (MachineOperand &MO : BundledMI->operands()) { if (!MO.isReg()) continue; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index 5a93b58e0baf..50411c177007 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -199,7 +199,8 @@ class RAGreedy : public MachineFunctionPass, struct RegInfo { LiveRangeStage Stage = RS_New; - // Cascade - Eviction loop prevention. See canEvictInterference(). + // Cascade - Eviction loop prevention. See + // canEvictInterferenceBasedOnCost(). unsigned Cascade = 0; RegInfo() = default; @@ -207,13 +208,51 @@ class RAGreedy : public MachineFunctionPass, IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; + LiveRangeStage getStage(Register Reg) const { + return ExtraRegInfo[Reg].Stage; + } + LiveRangeStage getStage(const LiveInterval &VirtReg) const { - return ExtraRegInfo[VirtReg.reg()].Stage; + return getStage(VirtReg.reg()); + } + + void setStage(Register Reg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[Reg].Stage = Stage; } void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + setStage(VirtReg.reg(), Stage); + } + + /// Return the current stage of the register, if present, otherwise initialize + /// it and return that. + LiveRangeStage getOrInitStage(Register Reg) { + ExtraRegInfo.grow(Reg); + return getStage(Reg); + } + + unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; } + + void setCascade(Register Reg, unsigned Cascade) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); - ExtraRegInfo[VirtReg.reg()].Stage = Stage; + ExtraRegInfo[Reg].Cascade = Cascade; + } + + unsigned getOrAssignNewCascade(Register Reg) { + unsigned Cascade = getCascade(Reg); + if (!Cascade) { + Cascade = NextCascade++; + setCascade(Reg, Cascade); + } + return Cascade; + } + + unsigned getCascadeOrCurrentNext(Register Reg) const { + unsigned Cascade = getCascade(Reg); + if (!Cascade) + Cascade = NextCascade; + return Cascade; } template<typename Iterator> @@ -410,8 +449,11 @@ private: void calcGapWeights(MCRegister, SmallVectorImpl<float> &); Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; - bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &, - const SmallVirtRegSet &) const; + bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool, + EvictionCost &, + const SmallVirtRegSet &) const; + bool canEvictHintInterference(LiveInterval &, MCRegister, + const SmallVirtRegSet &) const; bool canEvictInterferenceInRange(const LiveInterval &VirtReg, MCRegister PhysReg, SlotIndex Start, SlotIndex End, EvictionCost &MaxCost) const; @@ -683,15 +725,16 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { assert(Reg.isVirtual() && "Can only enqueue virtual registers"); unsigned Prio; - ExtraRegInfo.grow(Reg); - if (ExtraRegInfo[Reg].Stage == RS_New) - ExtraRegInfo[Reg].Stage = RS_Assign; - - if (ExtraRegInfo[Reg].Stage == RS_Split) { + auto Stage = getOrInitStage(Reg); + if (Stage == RS_New) { + Stage = RS_Assign; + setStage(Reg, Stage); + } + if (Stage == RS_Split) { // Unsplit ranges that couldn't be allocated immediately are deferred until // everything else has been allocated. Prio = Size; - } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { + } else if (Stage == RS_Memory) { // Memory operand should be considered last. // Change the priority such that Memory operand are assigned in // the reverse order that they came in. @@ -706,7 +749,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { bool ForceGlobal = !ReverseLocal && (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC)); - if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && + if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { // Allocate original local ranges in linear instruction order. Since they // are singly defined, this produces optimal coloring in the absence of @@ -780,10 +823,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, if (Order.isHint(Hint)) { MCRegister PhysHint = Hint.asMCReg(); LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n'); - EvictionCost MaxCost; - MaxCost.setBrokenHints(1); - if (canEvictInterference(VirtReg, PhysHint, true, MaxCost, - FixedRegisters)) { + + if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) { evictInterference(VirtReg, PhysHint, NewVRegs); return PhysHint; } @@ -864,8 +905,19 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, return false; } -/// canEvictInterference - Return true if all interferences between VirtReg and -/// PhysReg can be evicted. +/// canEvictHintInterference - return true if the interference for VirtReg +/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. +bool RAGreedy::canEvictHintInterference( + LiveInterval &VirtReg, MCRegister PhysReg, + const SmallVirtRegSet &FixedRegisters) const { + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); + return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost, + FixedRegisters); +} + +/// canEvictInterferenceBasedOnCost - Return true if all interferences between +/// VirtReg and PhysReg can be evicted. /// /// @param VirtReg Live range that is about to be assigned. /// @param PhysReg Desired register for assignment. @@ -873,7 +925,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, /// @param MaxCost Only look for cheaper candidates and update with new cost /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterference( +bool RAGreedy::canEvictInterferenceBasedOnCost( LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. @@ -1054,9 +1106,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. - unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade; - if (!Cascade) - Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++; + unsigned Cascade = getOrAssignNewCascade(VirtReg.reg()); LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI) << " interference: Cascade " << Cascade << '\n'); @@ -1082,10 +1132,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); Matrix->unassign(*Intf); - assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade || + assert((getCascade(Intf->reg()) < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && "Cannot decrease cascade number, illegal eviction"); - ExtraRegInfo[Intf->reg()].Cascade = Cascade; + setCascade(Intf->reg(), Cascade); ++NumEvicted; NewVRegs.push_back(Intf->reg()); } @@ -1150,8 +1200,8 @@ MCRegister RAGreedy::tryFindEvictionCandidate( continue; } - if (!canEvictInterference(VirtReg, PhysReg, false, BestCost, - FixedRegisters)) + if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost, + FixedRegisters)) continue; // Best so far. @@ -1756,7 +1806,6 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SE->finish(&IntvMap); DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); // Sort out the new intervals created by splitting. We get four kinds: @@ -1765,10 +1814,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { - LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); + const LiveInterval &Reg = LIS->getInterval(LREdit.get(I)); // Ignore old intervals from DCE. - if (getStage(Reg) != RS_New) + if (getOrInitStage(Reg.reg()) != RS_New) continue; // Remainder interval. Don't try splitting again, spill if it doesn't @@ -2012,13 +2061,11 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Tell LiveDebugVariables about the new ranges. DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned I = 0, E = LREdit.size(); I != E; ++I) { - LiveInterval &LI = LIS->getInterval(LREdit.get(I)); - if (getStage(LI) == RS_New && IntvMap[I] == 0) + const LiveInterval &LI = LIS->getInterval(LREdit.get(I)); + if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0) setStage(LI, RS_Spill); } @@ -2104,8 +2151,6 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); - // Assign all new registers to RS_Spill. This was the last chance. setStage(LREdit.begin(), LREdit.end(), RS_Spill); return 0; @@ -2400,7 +2445,6 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector<unsigned, 8> IntvMap; SE->finish(&IntvMap); DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS); - // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, // leave the new intervals as RS_New so they can compete. @@ -3021,7 +3065,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, LiveRangeStage Stage = getStage(VirtReg); LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " - << ExtraRegInfo[VirtReg.reg()].Cascade << '\n'); + << getCascade(VirtReg.reg()) << '\n'); // Try to evict a less worthy live range, but only for ranges from the primary // queue. The RS_Split ranges already failed to do this, and they should not @@ -3311,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI)); ExtraRegInfo.clear(); - ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index c847068bca90..4c8534cf2d01 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -3908,20 +3908,20 @@ void RegisterCoalescer::lateLiveIntervalUpdate() { bool RegisterCoalescer:: copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { bool Progress = false; - for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { - if (!CurrList[i]) + for (MachineInstr *&MI : CurrList) { + if (!MI) continue; // Skip instruction pointers that have already been erased, for example by // dead code elimination. - if (ErasedInstrs.count(CurrList[i])) { - CurrList[i] = nullptr; + if (ErasedInstrs.count(MI)) { + MI = nullptr; continue; } bool Again = false; - bool Success = joinCopy(CurrList[i], Again); + bool Success = joinCopy(MI, Again); Progress |= Success; if (Success || !Again) - CurrList[i] = nullptr; + MI = nullptr; } return Progress; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 3f013eb6024e..0e8e8338b46d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -406,11 +406,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // register in later operands. The lanes of other defs will now be live // after this instruction, so these should not be treated as killed by the // instruction even though they appear to be killed in this one operand. - for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &OtherMO = MI->getOperand(I); + for (const MachineOperand &OtherMO : + llvm::drop_begin(MI->operands(), OperIdx + 1)) if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) KillLaneMask &= ~getLaneMaskForMO(OtherMO); - } } // Clear undef flag, we'll re-add it later once we know which subregister diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ce400ea43f29..df5a041b87cd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4436,7 +4436,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { SDValue OptimizedDiv = isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); - if (OptimizedDiv.getNode()) { + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) { // If the equivalent Div node also exists, update its users. unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), @@ -4464,6 +4464,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDLoc DL(N); if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + // fold (mulhs x, 0) -> 0 // do not return N0/N1, because undef node may exist. if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || @@ -4521,6 +4524,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDLoc DL(N); if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + // fold (mulhu x, 0) -> 0 // do not return N0/N1, because undef node may exist. if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) || @@ -4779,6 +4785,106 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { return SDValue(); } +// Function to calculate whether the Min/Max pair of SDNodes (potentially +// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1) +// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp +// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The +// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3 +static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, unsigned &BW) { + auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + // The compare and select operand should be the same or the select operands + // should be truncated versions of the comparison. + if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) + return 0; + // The constants need to be the same or a truncated version of each other. + ConstantSDNode *N1C = isConstOrConstSplat(N1); + ConstantSDNode *N3C = isConstOrConstSplat(N3); + if (!N1C || !N3C) + return 0; + const APInt &C1 = N1C->getAPIntValue(); + const APInt &C2 = N3C->getAPIntValue(); + if (C1.getBitWidth() < C2.getBitWidth() || + C1 != C2.sextOrSelf(C1.getBitWidth())) + return 0; + return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); + }; + + // Check the initial value is a SMIN/SMAX equivalent. + unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC); + if (!Opcode0) + return SDValue(); + + SDValue N00, N01, N02, N03; + ISD::CondCode N0CC; + switch (N0.getOpcode()) { + case ISD::SMIN: + case ISD::SMAX: + N00 = N02 = N0.getOperand(0); + N01 = N03 = N0.getOperand(1); + N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT; + break; + case ISD::SELECT_CC: + N00 = N0.getOperand(0); + N01 = N0.getOperand(1); + N02 = N0.getOperand(2); + N03 = N0.getOperand(3); + N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get(); + break; + case ISD::SELECT: + case ISD::VSELECT: + if (N0.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + N00 = N0.getOperand(0).getOperand(0); + N01 = N0.getOperand(0).getOperand(1); + N02 = N0.getOperand(1); + N03 = N0.getOperand(2); + N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get(); + break; + default: + return SDValue(); + } + + unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC); + if (!Opcode1 || Opcode0 == Opcode1) + return SDValue(); + + ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01); + ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1); + if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0)) + return SDValue(); + + const APInt &MinC = MinCOp->getAPIntValue(); + const APInt &MaxC = MaxCOp->getAPIntValue(); + APInt MinCPlus1 = MinC + 1; + if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2()) + return SDValue(); + BW = MinCPlus1.exactLogBase2() + 1; + return N02; +} + +static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + SelectionDAG &DAG) { + unsigned BW; + SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW); + if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) + return SDValue(); + EVT FPVT = Fp.getOperand(0).getValueType(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); + if (FPVT.isVector()) + NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, + FPVT.getVectorElementCount()); + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat( + ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT)) + return SDValue(); + SDLoc DL(Fp); + SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0), + DAG.getValueType(NewVT.getScalarType())); + return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); +} + SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4817,6 +4923,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.getNode(AltOpcode, DL, VT, N0, N1); } + if (Opcode == ISD::SMIN || Opcode == ISD::SMAX) + if (SDValue S = PerformMinMaxFpToSatCombine( + N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG)) + return S; + // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -9940,9 +10051,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, compressing, or truncating stores? - if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && - MST->isUnindexed() && !MST->isCompressingStore() && - !MST->isTruncatingStore()) + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() && + !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getMemOperand()); @@ -9997,9 +10107,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { // If this is a masked load with an all ones mask, we can use a unmasked load. // FIXME: Can we do this for indexed, expanding, or extending loads? - if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && - MLD->isUnindexed() && !MLD->isExpandingLoad() && - MLD->getExtensionType() == ISD::NON_EXTLOAD) { + if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() && + !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) { SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), MLD->getMemOperand()); return CombineTo(N, NewLd, NewLd.getValue(1)); @@ -10138,6 +10247,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return FMinMax; } + if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) + return S; + // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. // TODO: This should be extended to handle any constant. @@ -15007,7 +15119,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -23034,6 +23146,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); } + if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) + return S; + return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c1bb65409282..331e0325aea3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -765,7 +765,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, assert(!SD->isVariadic()); SDDbgOperand DbgOperand = SD->getLocationOps()[0]; MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + DIExpression *Expr = (DIExpression*)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); @@ -775,6 +775,13 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, DbgOperand.getKind() == SDDbgOperand::CONST) return EmitDbgValueFromSingleOp(SD, VRBaseMap); + // Immediately fold any indirectness from the LLVM-IR intrinsic into the + // expression: + if (SD->isIndirect()) { + std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; + Expr = DIExpression::append(Expr, Elts); + } + // It may not be immediately possible to identify the MachineInstr that // defines a VReg, it can depend for example on the order blocks are // emitted in. When this happens, or when further analysis is needed later, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index eb9d2286aeb4..08598eeded7a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3553,9 +3553,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Node. Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - if (Tmp2.getOpcode() == ISD::SETCC) { - Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, - Tmp1, Tmp2.getOperand(2), + if (Tmp2.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, + Tmp2.getOperand(0).getValueType())) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2), Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 1f73c9eea104..98312f91d8c0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -28,7 +28,7 @@ using namespace llvm; static cl::opt<bool> EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); -/// Do extensive, expensive, sanity checking. +/// Do extensive, expensive, basic correctness checking. void DAGTypeLegalizer::PerformExpensiveChecks() { // If a node is not processed, then none of its values should be mapped by any // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. @@ -534,7 +534,8 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can // in theory momentarily not be the case while ReplaceValueWith is doing - // its stuff. Mark the original node NewNode to help sanity checking. + // its stuff. Mark the original node NewNode to help basic correctness + // checking. N->setNodeId(NewNode); if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) // It morphed into a previously analyzed node - nothing more to do. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 539c9cb9c256..7ec2638b1e71 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment, - MLD->getAAInfo(), MLD->getRanges()); + MLD->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(), + MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Generate hi masked load. Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) @@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, LoMemVT.getStoreSize().getFixedSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(), - MLD->getRanges()); + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, MMO, MLD->getAddressingMode(), ExtType, @@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); SDValue Lo, Hi, Res; - unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize()); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment, - N->getAAInfo(), N->getRanges()); + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), @@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MPI = N->getPointerInfo().getWithOffset( LoMemVT.getStoreSize().getFixedSize()); - unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize()); MMO = DAG.getMachineFunction().getMachineMemOperand( - MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(), - N->getRanges()); + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 55fe26eb64cd..2695ed36991c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -268,8 +268,8 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { // Now see if there are no other dependencies // to instructions already in the packet. - for (unsigned i = 0, e = Packet.size(); i != e; ++i) - for (const SDep &Succ : Packet[i]->Succs) { + for (const SUnit *S : Packet) + for (const SDep &Succ : S->Succs) { // Since we do not add pseudos to packets, might as well // ignore order deps. if (Succ.isCtrl()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 95f7e43b151d..84e6d2a16422 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -706,8 +706,8 @@ void ScheduleDAGSDNodes::dump() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ScheduleDAGSDNodes::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) + for (const SUnit *SU : Sequence) { + if (SU) dumpNode(*SU); else dbgs() << "**** NOOP ****\n"; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 008665d50233..c282e03387dd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -406,8 +406,8 @@ bool ISD::isVPOpcode(unsigned Opcode) { switch (Opcode) { default: return false; -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ - case ISD::SDOPC: \ +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \ + case ISD::VPSD: \ return true; #include "llvm/IR/VPIntrinsics.def" } @@ -416,23 +416,25 @@ bool ISD::isVPOpcode(unsigned Opcode) { bool ISD::isVPBinaryOp(unsigned Opcode) { switch (Opcode) { default: - return false; -#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ - case ISD::SDOPC: \ - return true; + break; +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: +#define VP_PROPERTY_BINARYOP return true; +#define END_REGISTER_VP_SDNODE(VPSD) break; #include "llvm/IR/VPIntrinsics.def" } + return false; } bool ISD::isVPReduction(unsigned Opcode) { switch (Opcode) { default: - return false; -#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ - case ISD::SDOPC: \ - return true; + break; +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD: +#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true; +#define END_REGISTER_VP_SDNODE(VPSD) break; #include "llvm/IR/VPIntrinsics.def" } + return false; } /// The operand position of the vector mask. @@ -440,8 +442,8 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { default: return None; -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \ - case ISD::SDOPC: \ +#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ + case ISD::VPSD: \ return MASKPOS; #include "llvm/IR/VPIntrinsics.def" } @@ -452,8 +454,8 @@ Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { switch (Opcode) { default: return None; -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ - case ISD::SDOPC: \ +#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ + case ISD::VPSD: \ return EVLPOS; #include "llvm/IR/VPIntrinsics.def" } @@ -974,7 +976,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) { } #ifndef NDEBUG -/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +/// VerifySDNode - Check the given SDNode. Aborts if it is invalid. static void VerifySDNode(SDNode *N) { switch (N->getOpcode()) { default: @@ -4540,10 +4542,25 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { } // FIXME: unify with llvm::haveNoCommonBitsSet. -// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); + // Match masked merge pattern (X & ~M) op (Y & M) + if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) { + auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) { + if (isBitwiseNot(NotM, true)) { + SDValue NotOperand = NotM->getOperand(0); + return NotOperand == And->getOperand(0) || + NotOperand == And->getOperand(1); + } + return false; + }; + if (MatchNoCommonBitsPattern(A->getOperand(0), B) || + MatchNoCommonBitsPattern(A->getOperand(1), B) || + MatchNoCommonBitsPattern(B->getOperand(0), A) || + MatchNoCommonBitsPattern(B->getOperand(1), A)) + return true; + } return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), computeKnownBits(B)); } @@ -5070,7 +5087,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); break; case ISD::BITCAST: - // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueSizeInBits() && "Cannot BITCAST between types of different sizes!"); if (VT == Operand.getValueType()) return Operand; // noop conversion. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5d911c165293..7726a0007e44 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata()); + MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - MemoryLocation ML; - if (VT.isScalableVector()) - ML = MemoryLocation::getAfter(PtrOperand); - else - ML = MemoryLocation(PtrOperand, LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, @@ -5807,8 +5797,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::vscale: { match(&I, m_VScale(DAG.getDataLayout())); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, - DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1))); + setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1))); return; } case Intrinsic::vastart: visitVAStart(I); return; @@ -6942,10 +6931,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); - SDValue N = - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT); if (Intrinsic == Intrinsic::eh_exceptioncode) - N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + N = DAG.getZExtOrTrunc(N, sdl, MVT::i32); setValue(&I, N); return; } @@ -6957,7 +6945,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (Triple.getArch() != Triple::x86_64) return; - SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; // We want to say that we always want the arguments in registers. @@ -6974,7 +6961,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // see that some registers may be assumed clobbered and have to preserve // them across calls to the intrinsic. MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, - DL, NodeTys, Ops); + sdl, NodeTys, Ops); SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); @@ -6988,7 +6975,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (Triple.getArch() != Triple::x86_64) return; - SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; // We want to say that we always want the arguments in registers. @@ -7009,7 +6995,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // see that some registers may be assumed clobbered and have to preserve // them across calls to the intrinsic. MachineSDNode *MN = DAG.getMachineNode( - TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops); SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); @@ -7047,7 +7033,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (!Base) report_fatal_error( "llvm.icall.branch.funnel operand must be a GlobalValue"); - Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0)); struct BranchFunnelTarget { int64_t Offset; @@ -7068,8 +7054,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, report_fatal_error( "llvm.icall.branch.funnel operand must be a GlobalValue"); Targets.push_back({Offset, DAG.getTargetGlobalAddress( - GA->getGlobal(), getCurSDLoc(), - Val.getValueType(), GA->getOffset())}); + GA->getGlobal(), sdl, Val.getValueType(), + GA->getOffset())}); } llvm::sort(Targets, [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { @@ -7077,13 +7063,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, }); for (auto &T : Targets) { - Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); + Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32)); Ops.push_back(T.Target); } Ops.push_back(DAG.getRoot()); // Chain - SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, - getCurSDLoc(), MVT::Other, Ops), + SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl, + MVT::Other, Ops), 0); DAG.setRoot(N); setValue(&I, N); @@ -7102,7 +7088,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; SDValue Val = TSI.EmitTargetCodeForSetTag( - DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), + DAG, sdl, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), ZeroMemory); DAG.setRoot(Val); @@ -7114,46 +7100,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Const = getValue(I.getOperand(1)); EVT PtrVT = Ptr.getValueType(); - setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr, - DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT))); + setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, + DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } case Intrinsic::get_active_lane_mask: { - auto DL = getCurSDLoc(); + EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); - SDValue TripCount = getValue(I.getOperand(1)); - Type *ElementTy = I.getOperand(0)->getType(); - EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - unsigned VecWidth = VT.getVectorNumElements(); + EVT ElementVT = Index.getValueType(); - SmallVector<SDValue, 16> OpsTripCount; - SmallVector<SDValue, 16> OpsIndex; - SmallVector<SDValue, 16> OpsStepConstants; - for (unsigned i = 0; i < VecWidth; i++) { - OpsTripCount.push_back(TripCount); - OpsIndex.push_back(Index); - OpsStepConstants.push_back( - DAG.getConstant(i, DL, EVT::getEVT(ElementTy))); + if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) { + visitTargetIntrinsic(I, Intrinsic); + return; } - EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth); + SDValue TripCount = getValue(I.getOperand(1)); + auto VecTy = CCVT.changeVectorElementType(ElementVT); - auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth)); - SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex); - SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); + SDValue VectorIndex, VectorTripCount; + if (VecTy.isScalableVector()) { + VectorIndex = DAG.getSplatVector(VecTy, sdl, Index); + VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount); + } else { + VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index); + VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount); + } + SDValue VectorStep = DAG.getStepVector(sdl, VecTy); SDValue VectorInduction = DAG.getNode( - ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); - SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount); - SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), + ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); + SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0), VectorTripCount, ISD::CondCode::SETULT); - setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, - DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), + setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT, + DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT), SetCC)); return; } case Intrinsic::experimental_vector_insert: { - auto DL = getCurSDLoc(); - SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); @@ -7163,16 +7145,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) Index = DAG.getVectorIdxConstant( - cast<ConstantSDNode>(Index)->getZExtValue(), DL); + cast<ConstantSDNode>(Index)->getZExtValue(), sdl); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, + setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec, Index)); return; } case Intrinsic::experimental_vector_extract: { - auto DL = getCurSDLoc(); - SDValue Vec = getValue(I.getOperand(0)); SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -7182,9 +7162,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); if (Index.getValueType() != VectorIdxTy) Index = DAG.getVectorIdxConstant( - cast<ConstantSDNode>(Index)->getZExtValue(), DL); + cast<ConstantSDNode>(Index)->getZExtValue(), sdl); - setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); + setValue(&I, + DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index)); return; } case Intrinsic::experimental_vector_reverse: @@ -7314,9 +7295,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { Optional<unsigned> ResOPC; switch (VPIntrin.getIntrinsicID()) { -#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN: -#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID; -#define END_REGISTER_VP_INTRINSIC(...) break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD; +#define END_REGISTER_VP_INTRINSIC(VPID) break; #include "llvm/IR/VPIntrinsics.def" } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e4a69adff05b..737695b5eabe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -645,6 +645,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( if (DemandedBits == 0 || DemandedElts == 0) return DAG.getUNDEF(Op.getValueType()); + bool IsLE = DAG.getDataLayout().isLittleEndian(); unsigned NumElts = DemandedElts.getBitWidth(); unsigned BitWidth = DemandedBits.getBitWidth(); KnownBits LHSKnown, RHSKnown; @@ -663,16 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedElts, DAG, Depth + 1)) return DAG.getBitcast(DstVT, V); - // TODO - bigendian once we have test coverage. - if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && - DAG.getDataLayout().isLittleEndian()) { + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -687,8 +687,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( } // TODO - bigendian once we have test coverage. - if ((NumSrcEltBits % NumDstEltBits) == 0 && - DAG.getDataLayout().isLittleEndian()) { + if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) { unsigned Scale = NumSrcEltBits / NumDstEltBits; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); @@ -802,8 +801,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); - if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() && - DAG.getDataLayout().isLittleEndian() && + if (IsLE && DemandedElts == 1 && + DstVT.getSizeInBits() == SrcVT.getSizeInBits() && DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) { return DAG.getBitcast(DstVT, Src); } @@ -913,6 +912,7 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getValueType().isScalableVector()) return false; + bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && @@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::ROTR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + bool IsROTL = (Op.getOpcode() == ISD::ROTL); // If we're rotating an 0/-1 value, then it stays an 0/-1 value. if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) return TLO.CombineTo(Op, Op0); + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + unsigned RevAmt = BitWidth - Amt; + + // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt)) + // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt) + APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + + // rot*(x, 0) --> x + if (Amt == 0) + return TLO.CombineTo(Op, Op0); + + // See if we don't demand either half of the rotated bits. + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) && + DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) { + Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1)); + } + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) && + DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) { + Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + } + } + // For pow-2 bitwidths we only demand the bottom modulo amt bits. if (isPowerOf2_32(BitWidth)) { APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); @@ -1887,9 +1916,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element // then we can just bitcast to the result. - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); unsigned Opc = @@ -1925,9 +1953,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element // then we can just bitcast to the result. - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); unsigned Opc = @@ -1976,9 +2003,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we only need the bottom element then we can just bitcast. // TODO: Handle ANY_EXTEND? - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); @@ -2140,16 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. - // TODO - bigendian once we have test coverage. - if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -2167,8 +2192,8 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; - } else if ((NumSrcEltBits % BitWidth) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { + } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) { + // TODO - bigendian once we have test coverage. unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); @@ -2409,6 +2434,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDLoc DL(Op); unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); // Helper for demanding the specified elements and all the bits of both binary // operands. @@ -2484,7 +2510,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Try calling SimplifyDemandedBits, converting demanded elts to the bits // of the large element. // TODO - bigendian once we have test coverage. - if (TLO.DAG.getDataLayout().isLittleEndian()) { + if (IsLE) { unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits); for (unsigned i = 0; i != NumElts; ++i) @@ -2797,9 +2823,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero = SrcZero.zextOrTrunc(NumElts); KnownUndef = SrcUndef.zextOrTrunc(NumElts); - if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && + if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && Op.getValueSizeInBits() == Src.getValueSizeInBits() && - DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) { + DemandedSrcElts == 1) { // aext - if we just need the bottom element then we can bitcast. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); } @@ -2812,8 +2838,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( // zext - if we just need the bottom element then we can mask: // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. - if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && - Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && + if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND && + Op->isOnlyUserOf(Src.getNode()) && Op.getValueSizeInBits() == Src.getValueSizeInBits()) { SDLoc DL(Op); EVT SrcVT = Src.getValueType(); @@ -2834,9 +2860,19 @@ bool TargetLowering::SimplifyDemandedVectorElts( // TODO: There are more binop opcodes that could be handled here - MIN, // MAX, saturated math, etc. + case ISD::ADD: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) { + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1, /*AssumeSingleUse*/ true)) + return true; + } + LLVM_FALLTHROUGH; + } case ISD::OR: case ISD::XOR: - case ISD::ADD: case ISD::SUB: case ISD::FADD: case ISD::FSUB: @@ -5586,7 +5622,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); assert(!P.isZero() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); + assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) // R = ((2^W - 1) u% D) @@ -5832,7 +5868,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); assert(!P.isZero() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); + assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K APInt A = APInt::getSignedMaxValue(W).udiv(D0); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp index 9aea5a7a8853..f49ba5ccd447 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -159,8 +159,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isFI()) continue; int FI = MO.getIndex(); @@ -394,8 +393,7 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF) { // Update the operands. - for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (MachineOperand &MO : MI.operands()) { if (!MO.isFI()) continue; int OldFI = MO.getIndex(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index 943bd18c6c8b..54fc6ee45d00 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); -static cl::opt<unsigned> TailDupJmpTableLoopSize( - "tail-dup-jmptable-loop-size", - cl::desc("Maximum loop latches to consider tail duplication that are " - "successors of loop header."), - cl::init(128), cl::Hidden); - static cl::opt<bool> TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; - // When doing tail-duplication with jumptable loops like: - // 1 -> 2 <-> 3 | - // \ <-> 4 | - // \ <-> 5 | - // \ <-> ... | - // \---> rest | - // quadratic number of edges and much more loops are added to CFG. This - // may cause compile time regression when jumptable is quiet large. - // So set the limit on jumptable cases. - auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { - const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(), - TailBB.pred_end()); - // Check the basic block has large number of successors, all of them only - // have one successor which is the basic block itself. - return llvm::count_if( - TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { - return Preds.count(SuccBB) && SuccBB->succ_size() == 1; - }) > TailDupJmpTableLoopSize; - }; - - if (isLargeJumpTableLoop(TailBB)) - return false; - // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index b0594ec086b2..fbf190a52585 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -136,6 +136,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew( return 0; } +bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const { + if (!hasFP(MF)) + return false; + + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + return RegInfo->useFPForScavengingIndex(MF) && + !RegInfo->hasStackRealignment(MF); +} + bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) { if (!F.hasLocalLinkage() || F.hasAddressTaken() || !F.hasFnAttribute(Attribute::NoRecurse)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index e74b3195a130..5119dac36713 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -957,8 +957,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( // If any of the registers accessed are non-constant, conservatively assume // the instruction is not rematerializable. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) @@ -1401,3 +1400,21 @@ std::string TargetInstrInfo::createMIROperandComment( } TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} + +void TargetInstrInfo::mergeOutliningCandidateAttributes( + Function &F, std::vector<outliner::Candidate> &Candidates) const { + // Include target features from an arbitrary candidate for the outlined + // function. This makes sure the outlined function knows what kinds of + // instructions are going into it. This is fine, since all parent functions + // must necessarily support the instructions that are in the outlined region. + outliner::Candidate &FirstCand = Candidates.front(); + const Function &ParentFn = FirstCand.getMF()->getFunction(); + if (ParentFn.hasFnAttribute("target-features")) + F.addFnAttr(ParentFn.getFnAttribute("target-features")); + + // Set nounwind, so we don't generate eh_frame. + if (llvm::all_of(Candidates, [](const outliner::Candidate &C) { + return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind); + })) + F.addFnAttr(Attribute::NoUnwind); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 1d3bb286c882..d1c2cdeb133b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -1082,7 +1082,7 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) return nullptr; - // Basic sanity checks. + // Basic correctness checks. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || RHS->isThreadLocal()) @@ -2135,7 +2135,7 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) return nullptr; - // Basic sanity checks. + // Basic correctness checks. if (LHS->getType()->getPointerAddressSpace() != 0 || RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || RHS->isThreadLocal()) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 46cec5407565..dfd962be2882 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -373,19 +373,25 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) { return false; } -/// Given a register, if has a single in-basic block use, return the use -/// instruction if it's a copy or a two-address use. +/// Given a register, if all its uses are in the same basic block, return the +/// last use instruction if it's a copy or a two-address use. static MachineInstr * findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, MachineRegisterInfo *MRI, const TargetInstrInfo *TII, - bool &IsCopy, Register &DstReg, bool &IsDstPhys) { - if (!MRI->hasOneNonDBGUse(Reg)) - // None or more than one use. - return nullptr; - MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg); - MachineInstr &UseMI = *UseOp.getParent(); - if (UseMI.getParent() != MBB) + bool &IsCopy, Register &DstReg, bool &IsDstPhys, + LiveIntervals *LIS) { + MachineOperand *UseOp = nullptr; + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB) + return nullptr; + if (isPlainlyKilled(MI, Reg, LIS)) + UseOp = &MO; + } + if (!UseOp) return nullptr; + MachineInstr &UseMI = *UseOp->getParent(); + Register SrcReg; bool IsSrcPhys; if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { @@ -399,7 +405,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB, } if (UseMI.isCommutable()) { unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex; - unsigned Src2 = UseMI.getOperandNo(&UseOp); + unsigned Src2 = UseMI.getOperandNo(UseOp); if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) { MachineOperand &MO = UseMI.getOperand(Src1); if (MO.isReg() && MO.isUse() && @@ -492,8 +498,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) { return; } - for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) { removeMapRegEntry(MO, SrcRegMap, TRI); continue; @@ -685,7 +690,6 @@ bool TwoAddressInstructionPass::convertInstTo3Addr( // If the old instruction is debug value tracked, an update is required. if (auto OldInstrNum = mi->peekDebugInstrNum()) { - // Sanity check. assert(mi->getNumExplicitDefs() == 1); assert(NewMI->getNumExplicitDefs() == 1); @@ -724,7 +728,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) { Register NewReg; Register Reg = DstReg; while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, - NewReg, IsDstPhys)) { + NewReg, IsDstPhys, LIS)) { if (IsCopy && !Processed.insert(UseMI).second) break; @@ -1336,8 +1340,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg().isVirtual()) { if (MO.isUse()) { if (MO.isKill()) { |
