diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen')
251 files changed, 10583 insertions, 5136 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp index cdf5586766da..f5dbaccfcad5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/Analysis.cpp @@ -21,12 +21,9 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp index 03e63321e3c4..1940f46232d3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp @@ -38,8 +38,19 @@ void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA, // unsigned long personality; /* Pointer to the personality routine */ // } - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getCompactUnwindSection()); + auto *EHInfo = + cast<MCSectionXCOFF>(Asm->getObjFileLowering().getCompactUnwindSection()); + if (Asm->TM.getFunctionSections()) { + // If option -ffunction-sections is on, append the function name to the + // name of EH Info Table csect so that each function has its own EH Info + // Table csect. This helps the linker to garbage-collect EH info of unused + // functions. + SmallString<128> NameStr = EHInfo->getName(); + raw_svector_ostream(NameStr) << '.' << Asm->MF->getFunction().getName(); + EHInfo = Asm->OutContext.getXCOFFSection(NameStr, EHInfo->getKind(), + EHInfo->getCsectProp()); + } + Asm->OutStreamer->switchSection(EHInfo); MCSymbol *EHInfoLabel = TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(Asm->MF); Asm->OutStreamer->emitLabel(EHInfoLabel); @@ -74,8 +85,8 @@ void AIXException::endFunction(const MachineFunction *MF) { const Function &F = MF->getFunction(); assert(F.hasPersonalityFn() && "Landingpads are presented, but no personality routine is found."); - const GlobalValue *Per = - dyn_cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); + const auto *Per = + cast<GlobalValue>(F.getPersonalityFn()->stripPointerCasts()); const MCSymbol *PerSym = Asm->TM.getSymbol(Per); emitExceptionInfoTable(LSDALabel, PerSym); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 223840c21d8b..e04a29fbb42b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -14,21 +14,14 @@ #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} -ARMException::~ARMException() {} +ARMException::~ARMException() = default; ARMTargetStreamer &ARMException::getTargetStreamer() { MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer(); @@ -101,7 +94,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding, // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = TypeInfos.size(); } @@ -116,7 +109,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding, // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = 0; } for (std::vector<unsigned>::const_iterator diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 65c45f73e965..b10d79f4b5a6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -18,7 +18,6 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" @@ -563,7 +562,7 @@ void llvm::emitDWARF5AccelTable( if (CompUnits.empty()) return; - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( Asm->getObjFileLowering().getDwarfDebugNamesSection()); Contents.finalize(Asm, "names"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp index 21da9d50efba..32d8dc793510 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -17,7 +17,7 @@ using namespace llvm; unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { - HasBeenUsed = true; + resetUsedFlag(true); auto IterBool = Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS))); return IterBool.first->second.Number; @@ -44,7 +44,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { return; // Start the dwarf addr section. - Asm.OutStreamer->SwitchSection(AddrSection); + Asm.OutStreamer->switchSection(AddrSection); MCSymbol *EndLabel = nullptr; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3e8e190eecc3..4a31bf85446b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" @@ -48,7 +49,6 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" @@ -82,33 +82,26 @@ #include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" -#include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" -#include "llvm/Remarks/Remark.h" -#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStreamer.h" -#include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" @@ -125,7 +118,6 @@ #include <cinttypes> #include <cstdint> #include <iterator> -#include <limits> #include <memory> #include <string> #include <utility> @@ -135,11 +127,6 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -// FIXME: this option currently only applies to DWARF, and not CodeView, tables -static cl::opt<bool> - DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, - cl::desc("Disable debug info printing")); - const char DWARFGroupName[] = "dwarf"; const char DWARFGroupDescription[] = "DWARF Emission"; const char DbgTimerName[] = "emit"; @@ -167,6 +154,178 @@ static gcp_map_type &getGCMap(void *&P) { return *(gcp_map_type*)P; } +namespace { +class AddrLabelMapCallbackPtr final : CallbackVH { + AddrLabelMap *Map = nullptr; + +public: + AddrLabelMapCallbackPtr() = default; + AddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} + + void setPtr(BasicBlock *BB) { + ValueHandleBase::operator=(BB); + } + + void setMap(AddrLabelMap *map) { Map = map; } + + void deleted() override; + void allUsesReplacedWith(Value *V2) override; +}; +} // namespace + +class llvm::AddrLabelMap { + MCContext &Context; + struct AddrLabelSymEntry { + /// The symbols for the label. + TinyPtrVector<MCSymbol *> Symbols; + + Function *Fn; // The containing function of the BasicBlock. + unsigned Index; // The index in BBCallbacks for the BasicBlock. + }; + + DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; + + /// Callbacks for the BasicBlock's that we have entries for. We use this so + /// we get notified if a block is deleted or RAUWd. + std::vector<AddrLabelMapCallbackPtr> BBCallbacks; + + /// This is a per-function list of symbols whose corresponding BasicBlock got + /// deleted. These symbols need to be emitted at some point in the file, so + /// AsmPrinter emits them after the function body. + DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>> + DeletedAddrLabelsNeedingEmission; + +public: + AddrLabelMap(MCContext &context) : Context(context) {} + + ~AddrLabelMap() { + assert(DeletedAddrLabelsNeedingEmission.empty() && + "Some labels for deleted blocks never got emitted"); + } + + ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB); + + void takeDeletedSymbolsForFunction(Function *F, + std::vector<MCSymbol *> &Result); + + void UpdateForDeletedBlock(BasicBlock *BB); + void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); +}; + +ArrayRef<MCSymbol *> AddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + // If we already had an entry for this block, just return it. + if (!Entry.Symbols.empty()) { + assert(BB->getParent() == Entry.Fn && "Parent changed"); + return Entry.Symbols; + } + + // Otherwise, this is a new entry, create a new symbol for it and add an + // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. + BBCallbacks.emplace_back(BB); + BBCallbacks.back().setMap(this); + Entry.Index = BBCallbacks.size() - 1; + Entry.Fn = BB->getParent(); + MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol() + : Context.createTempSymbol(); + Entry.Symbols.push_back(Sym); + return Entry.Symbols; +} + +/// If we have any deleted symbols for F, return them. +void AddrLabelMap::takeDeletedSymbolsForFunction( + Function *F, std::vector<MCSymbol *> &Result) { + DenseMap<AssertingVH<Function>, std::vector<MCSymbol *>>::iterator I = + DeletedAddrLabelsNeedingEmission.find(F); + + // If there are no entries for the function, just return. + if (I == DeletedAddrLabelsNeedingEmission.end()) + return; + + // Otherwise, take the list. + std::swap(Result, I->second); + DeletedAddrLabelsNeedingEmission.erase(I); +} + +//===- Address of Block Management ----------------------------------------===// + +ArrayRef<MCSymbol *> +AsmPrinter::getAddrLabelSymbolToEmit(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (!AddrLabelSymbols) + AddrLabelSymbols = std::make_unique<AddrLabelMap>(OutContext); + return AddrLabelSymbols->getAddrLabelSymbolToEmit( + const_cast<BasicBlock *>(BB)); +} + +void AsmPrinter::takeDeletedSymbolsForFunction( + const Function *F, std::vector<MCSymbol *> &Result) { + // If no blocks have had their addresses taken, we're done. + if (!AddrLabelSymbols) + return; + return AddrLabelSymbols->takeDeletedSymbolsForFunction( + const_cast<Function *>(F), Result); +} + +void AddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { + // If the block got deleted, there is no need for the symbol. If the symbol + // was already emitted, we can just forget about it, otherwise we need to + // queue it up for later emission when the function is output. + AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]); + AddrLabelSymbols.erase(BB); + assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?"); + BBCallbacks[Entry.Index] = nullptr; // Clear the callback. + +#if !LLVM_MEMORY_SANITIZER_BUILD + // BasicBlock is destroyed already, so this access is UB detectable by msan. + assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && + "Block/parent mismatch"); +#endif + + for (MCSymbol *Sym : Entry.Symbols) { + if (Sym->isDefined()) + return; + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } +} + +void AddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { + // Get the entry for the RAUW'd block and remove it from our map. + AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]); + AddrLabelSymbols.erase(Old); + assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?"); + + AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; + + // If New is not address taken, just move our symbol over to it. + if (NewEntry.Symbols.empty()) { + BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. + NewEntry = std::move(OldEntry); // Set New's entry. + return; + } + + BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. + + // Otherwise, we need to add the old symbols to the new block's set. + llvm::append_range(NewEntry.Symbols, OldEntry.Symbols); +} + +void AddrLabelMapCallbackPtr::deleted() { + Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr())); +} + +void AddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { + Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); +} + /// getGVAlignment - Return the alignment to use for the specified global /// value. This rounds up to the preferred alignment if possible and legal. Align AsmPrinter::getGVAlignment(const GlobalObject *GV, const DataLayout &DL, @@ -271,6 +430,10 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { bool AsmPrinter::doInitialization(Module &M) { auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); MMI = MMIWP ? &MMIWP->getMMI() : nullptr; + HasSplitStack = false; + HasNoSplitStack = false; + + AddrLabelSymbols = nullptr; // Initialize TargetLoweringObjectFile. const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) @@ -281,9 +444,6 @@ bool AsmPrinter::doInitialization(Module &M) { OutStreamer->initSections(false, *TM.getMCSubtargetInfo()); - if (DisableDebugInfoPrinting) - MMI->setDebugInfoAvailability(false); - // Emit the version-min deployment target directive if needed. // // FIXME: If we end up with a collection of these sorts of Darwin-specific @@ -335,11 +495,11 @@ bool AsmPrinter::doInitialization(Module &M) { // Emit module-level inline asm if it exists. if (!M.getModuleInlineAsm().empty()) { OutStreamer->AddComment("Start of file scope inline assembly"); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(), TM.Options.MCOptions); OutStreamer->AddComment("End of file scope inline assembly"); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } if (MAI->doesSupportDebugInformation()) { @@ -351,7 +511,7 @@ bool AsmPrinter::doInitialization(Module &M) { CodeViewLineTablesGroupDescription); } if (!EmitCodeView || M.getDwarfVersion()) { - if (!DisableDebugInfoPrinting) { + if (MMI->hasDebugInfo()) { DD = new DwarfDebug(this); Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, DbgTimerDescription, DWARFGroupName, @@ -536,9 +696,9 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (isVerbose()) { // When printing the control variable __emutls_v.*, // we don't need to print the original TLS variable name. - GV->printAsOperand(OutStreamer->GetCommentOS(), - /*PrintType=*/false, GV->getParent()); - OutStreamer->GetCommentOS() << '\n'; + GV->printAsOperand(OutStreamer->getCommentOS(), + /*PrintType=*/false, GV->getParent()); + OutStreamer->getCommentOS() << '\n'; } } @@ -652,7 +812,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { TheSection = getObjFileLowering().getTLSBSSSection(); OutStreamer->emitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); } else if (GVKind.isThreadData()) { - OutStreamer->SwitchSection(TheSection); + OutStreamer->switchSection(TheSection); emitAlignment(Alignment, GV); OutStreamer->emitLabel(MangSym); @@ -661,12 +821,12 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { GV->getInitializer()); } - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); // Emit the variable struct for the runtime. MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); - OutStreamer->SwitchSection(TLVSect); + OutStreamer->switchSection(TLVSect); // Emit the linkage here. emitLinkage(GV, GVSym); OutStreamer->emitLabel(GVSym); @@ -681,13 +841,13 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { OutStreamer->emitIntValue(0, PtrSize); OutStreamer->emitSymbolValue(MangSym, PtrSize); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); return; } MCSymbol *EmittedInitSym = GVSym; - OutStreamer->SwitchSection(TheSection); + OutStreamer->switchSection(TheSection); emitLinkage(GV, EmittedInitSym); emitAlignment(Alignment, GV); @@ -704,7 +864,7 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { OutStreamer->emitELFSize(EmittedInitSym, MCConstantExpr::create(Size, OutContext)); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } /// Emit the directive and value for debug thread local expression @@ -723,7 +883,7 @@ void AsmPrinter::emitFunctionHeader() { const Function &F = MF->getFunction(); if (isVerbose()) - OutStreamer->GetCommentOS() + OutStreamer->getCommentOS() << "-- Begin function " << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; @@ -737,7 +897,7 @@ void AsmPrinter::emitFunctionHeader() { MF->setSection(getObjFileLowering().getUniqueSectionForFunction(F, TM)); else MF->setSection(getObjFileLowering().SectionForGlobal(&F, TM)); - OutStreamer->SwitchSection(MF->getSection()); + OutStreamer->switchSection(MF->getSection()); if (!MAI->hasVisibilityOnlyWithLinkage()) emitVisibility(CurrentFnSym, F.getVisibility()); @@ -756,10 +916,10 @@ void AsmPrinter::emitFunctionHeader() { OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold); if (isVerbose()) { - F.printAsOperand(OutStreamer->GetCommentOS(), - /*PrintType=*/false, F.getParent()); + F.printAsOperand(OutStreamer->getCommentOS(), + /*PrintType=*/false, F.getParent()); emitFunctionHeaderComment(); - OutStreamer->GetCommentOS() << '\n'; + OutStreamer->getCommentOS() << '\n'; } // Emit the prefix data. @@ -817,7 +977,7 @@ void AsmPrinter::emitFunctionHeader() { // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. std::vector<MCSymbol*> DeadBlockSyms; - MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); + takeDeletedSymbolsForFunction(&F, DeadBlockSyms); for (MCSymbol *DeadBlockSym : DeadBlockSyms) { OutStreamer->AddComment("Address taken block that was later removed"); OutStreamer->emitLabel(DeadBlockSym); @@ -844,6 +1004,24 @@ void AsmPrinter::emitFunctionHeader() { // Emit the prologue data. if (F.hasPrologueData()) emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); + + // Emit the function prologue data for the indirect call sanitizer. + if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) { + assert(TM.getTargetTriple().getArch() == Triple::x86 || + TM.getTargetTriple().getArch() == Triple::x86_64); + assert(MD->getNumOperands() == 2); + + auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0)); + auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1)); + assert(PrologueSig && FTRTTIProxy); + emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig); + + const MCExpr *Proxy = lowerConstant(FTRTTIProxy); + const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext); + const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext); + // Use 32 bit since only small code model is supported. + OutStreamer->emitValue(PCRel, 4u); + } } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -912,7 +1090,7 @@ void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); OutStreamer->AddComment(OS.str()); - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -925,7 +1103,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); } AP.OutStreamer->AddComment(OS.str()); - AP.OutStreamer->AddBlankLine(); + AP.OutStreamer->addBlankLine(); } /// emitDebugValueComment - This method handles the target-independent form @@ -1147,32 +1325,42 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { const MCSymbol *FunctionSymbol = getFunctionBegin(); - OutStreamer->PushSection(); - OutStreamer->SwitchSection(BBAddrMapSection); + OutStreamer->pushSection(); + OutStreamer->switchSection(BBAddrMapSection); + OutStreamer->AddComment("version"); + OutStreamer->emitInt8(OutStreamer->getContext().getBBAddrMapVersion()); + OutStreamer->AddComment("feature"); + OutStreamer->emitInt8(0); + OutStreamer->AddComment("function address"); OutStreamer->emitSymbolValue(FunctionSymbol, getPointerSize()); - // Emit the total number of basic blocks in this function. + OutStreamer->AddComment("number of basic blocks"); OutStreamer->emitULEB128IntValue(MF.size()); + const MCSymbol *PrevMBBEndSymbol = FunctionSymbol; // Emit BB Information for each basic block in the funciton. for (const MachineBasicBlock &MBB : MF) { const MCSymbol *MBBSymbol = MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol(); - // Emit the basic block offset. - emitLabelDifferenceAsULEB128(MBBSymbol, FunctionSymbol); + // Emit the basic block offset relative to the end of the previous block. + // This is zero unless the block is padded due to alignment. + emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol); // Emit the basic block size. When BBs have alignments, their size cannot // always be computed from their offsets. emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol); OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + PrevMBBEndSymbol = MBB.getEndSymbol(); } - OutStreamer->PopSection(); + OutStreamer->popSection(); } void AsmPrinter::emitPseudoProbe(const MachineInstr &MI) { - auto GUID = MI.getOperand(0).getImm(); - auto Index = MI.getOperand(1).getImm(); - auto Type = MI.getOperand(2).getImm(); - auto Attr = MI.getOperand(3).getImm(); - DILocation *DebugLoc = MI.getDebugLoc(); - PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc); + if (PP) { + auto GUID = MI.getOperand(0).getImm(); + auto Index = MI.getOperand(1).getImm(); + auto Type = MI.getOperand(2).getImm(); + auto Attr = MI.getOperand(3).getImm(); + DILocation *DebugLoc = MI.getDebugLoc(); + PP->emitPseudoProbe(GUID, Index, Type, Attr, DebugLoc); + } } void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { @@ -1189,15 +1377,16 @@ void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { if (FrameInfo.hasVarSizedObjects()) return; - OutStreamer->PushSection(); - OutStreamer->SwitchSection(StackSizeSection); + OutStreamer->pushSection(); + OutStreamer->switchSection(StackSizeSection); const MCSymbol *FunctionSymbol = getFunctionBegin(); - uint64_t StackSize = FrameInfo.getStackSize(); + uint64_t StackSize = + FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize(); OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); OutStreamer->emitULEB128IntValue(StackSize); - OutStreamer->PopSection(); + OutStreamer->popSection(); } void AsmPrinter::emitStackUsage(const MachineFunction &MF) { @@ -1208,7 +1397,8 @@ void AsmPrinter::emitStackUsage(const MachineFunction &MF) { return; const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - uint64_t StackSize = FrameInfo.getStackSize(); + uint64_t StackSize = + FrameInfo.getStackSize() + FrameInfo.getUnsafeStackSize(); if (StackUsageStream == nullptr) { std::error_code EC; @@ -1298,7 +1488,7 @@ void AsmPrinter::emitFunctionBody() { } if (isVerbose()) - emitComments(MI, OutStreamer->GetCommentOS()); + emitComments(MI, OutStreamer->getCommentOS()); switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -1460,7 +1650,7 @@ void AsmPrinter::emitFunctionBody() { } // Switch to the original section in case basic block sections was used. - OutStreamer->SwitchSection(MF->getSection()); + OutStreamer->switchSection(MF->getSection()); const Function &F = MF->getFunction(); for (const auto &BB : F) { @@ -1527,9 +1717,9 @@ void AsmPrinter::emitFunctionBody() { emitPatchableFunctionEntries(); if (isVerbose()) - OutStreamer->GetCommentOS() << "-- End function\n"; + OutStreamer->getCommentOS() << "-- End function\n"; - OutStreamer->AddBlankLine(); + OutStreamer->addBlankLine(); } /// Compute the number of Global Variables that uses a Constant. @@ -1617,10 +1807,7 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { // Treat bitcasts of functions as functions also. This is important at least // on WebAssembly where object and function addresses can't alias each other. if (!IsFunction) - if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee())) - if (CE->getOpcode() == Instruction::BitCast) - IsFunction = - CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); + IsFunction = isa<Function>(GA.getAliasee()->stripPointerCasts()); // AIX's assembly directive `.set` is not usable for aliasing purpose, // so AIX has to use the extra-label-at-definition strategy. At this @@ -1650,13 +1837,13 @@ void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) { if (IsFunction) { OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction); if (TM.getTargetTriple().isOSBinFormatCOFF()) { - OutStreamer->BeginCOFFSymbolDef(Name); - OutStreamer->EmitCOFFSymbolStorageClass( + OutStreamer->beginCOFFSymbolDef(Name); + OutStreamer->emitCOFFSymbolStorageClass( GA.hasLocalLinkage() ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL); - OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); - OutStreamer->EndCOFFSymbolDef(); + OutStreamer->endCOFFSymbolDef(); } } @@ -1734,7 +1921,7 @@ void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) { // Switch to the remarks section. MCSection *RemarksSection = OutContext.getObjectFileInfo()->getRemarksSection(); - OutStreamer->SwitchSection(RemarksSection); + OutStreamer->switchSection(RemarksSection); OutStreamer->emitBinaryData(OS.str()); } @@ -1805,7 +1992,7 @@ bool AsmPrinter::doFinalization(Module &M) { // Output stubs for external and common global variables. MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { - OutStreamer->SwitchSection(TLOF.getDataSection()); + OutStreamer->switchSection(TLOF.getDataSection()); const DataLayout &DL = M.getDataLayout(); emitAlignment(Align(DL.getPointerSize())); @@ -1829,7 +2016,7 @@ bool AsmPrinter::doFinalization(Module &M) { for (const auto &Stub : Stubs) { SmallString<256> SectionName = StringRef(".rdata$"); SectionName += Stub.first->getName(); - OutStreamer->SwitchSection(OutContext.getCOFFSection( + OutStreamer->switchSection(OutContext.getCOFFSection( SectionName, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_LNK_COMDAT, @@ -1920,31 +2107,14 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit bytes for llvm.commandline metadata. emitModuleCommandLines(M); - // Emit __morestack address if needed for indirect calls. - if (MMI->usesMorestackAddr()) { - Align Alignment(1); - MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( - getDataLayout(), SectionKind::getReadOnly(), - /*C=*/nullptr, Alignment); - OutStreamer->SwitchSection(ReadOnlySection); - - MCSymbol *AddrSymbol = - OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); - OutStreamer->emitLabel(AddrSymbol); - - unsigned PtrSize = MAI->getCodePointerSize(); - OutStreamer->emitSymbolValue(GetExternalSymbolSymbol("__morestack"), - PtrSize); - } - // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if // split-stack is used. - if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) { - OutStreamer->SwitchSection( - OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0)); - if (MMI->hasNosplitStack()) - OutStreamer->SwitchSection( - OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0)); + if (TM.getTargetTriple().isOSBinFormatELF() && HasSplitStack) { + OutStreamer->switchSection(OutContext.getELFSection(".note.GNU-split-stack", + ELF::SHT_PROGBITS, 0)); + if (HasNoSplitStack) + OutStreamer->switchSection(OutContext.getELFSection( + ".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0)); } // If we don't have any trampolines, then we don't require stack memory @@ -1952,7 +2122,7 @@ bool AsmPrinter::doFinalization(Module &M) { Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) - OutStreamer->SwitchSection(S); + OutStreamer->switchSection(S); if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. @@ -1973,7 +2143,7 @@ bool AsmPrinter::doFinalization(Module &M) { GV.getVisibility() != GlobalValue::DefaultVisibility) continue; - OutStreamer->SwitchSection( + OutStreamer->switchSection( OutContext.getELFSection(".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", false, ++UniqueID, nullptr)); OutStreamer->emitBytes(GV.getPartition()); @@ -1989,8 +2159,9 @@ bool AsmPrinter::doFinalization(Module &M) { emitEndOfAsmFile(M); MMI = nullptr; + AddrLabelSymbols = nullptr; - OutStreamer->Finish(); + OutStreamer->finish(); OutStreamer->reset(); OwnedMLI.reset(); OwnedMDT.reset(); @@ -2009,6 +2180,16 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; const Function &F = MF.getFunction(); + // Record that there are split-stack functions, so we will emit a special + // section to tell the linker. + if (MF.shouldSplitStack()) { + HasSplitStack = true; + + if (!MF.getFrameInfo().needsSplitStackProlog()) + HasNoSplitStack = true; + } else + HasNoSplitStack = true; + // Get the function symbol. if (!MAI->needsFunctionDescriptors()) { CurrentFnSym = getSymbol(&MF.getFunction()); @@ -2113,7 +2294,7 @@ void AsmPrinter::emitConstantPool() { continue; if (CurSection != CPSections[i].S) { - OutStreamer->SwitchSection(CPSections[i].S); + OutStreamer->switchSection(CPSections[i].S); emitAlignment(Align(CPSections[i].Alignment)); CurSection = CPSections[i].S; Offset = 0; @@ -2156,7 +2337,7 @@ void AsmPrinter::emitJumpTableInfo() { if (JTInDiffSection) { // Drop it in the readonly section. MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); - OutStreamer->SwitchSection(ReadOnlySection); + OutStreamer->switchSection(ReadOnlySection); } emitAlignment(Align(MJTI->getEntryAlignment(DL))); @@ -2392,7 +2573,7 @@ void AsmPrinter::emitXXStructorList(const DataLayout &DL, const Constant *List, MCSection *OutputSection = (IsCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) : Obj.getStaticDtorSection(S.Priority, KeySym)); - OutStreamer->SwitchSection(OutputSection); + OutStreamer->switchSection(OutputSection); if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) emitAlignment(Align); emitXXStructor(DL, S.Func); @@ -2423,8 +2604,8 @@ void AsmPrinter::emitModuleCommandLines(Module &M) { if (!NMD || !NMD->getNumOperands()) return; - OutStreamer->PushSection(); - OutStreamer->SwitchSection(CommandLine); + OutStreamer->pushSection(); + OutStreamer->switchSection(CommandLine); OutStreamer->emitZeros(1); for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { const MDNode *N = NMD->getOperand(i); @@ -2434,7 +2615,7 @@ void AsmPrinter::emitModuleCommandLines(Module &M) { OutStreamer->emitBytes(S->getString()); OutStreamer->emitZeros(1); } - OutStreamer->PopSection(); + OutStreamer->popSection(); } //===--------------------------------------------------------------------===// @@ -2471,7 +2652,7 @@ void AsmPrinter::emitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, unsigned Size, bool IsSectionRelative) const { if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { - OutStreamer->EmitCOFFSecRel32(Label, Offset); + OutStreamer->emitCOFFSecRel32(Label, Offset); if (Size > 4) OutStreamer->emitZeros(Size - 4); return; @@ -2541,6 +2722,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { llvm_unreachable("Unknown constant value to lower!"); } + // The constant expression opcodes are limited to those that are necessary + // to represent relocations on supported targets. Expressions involving only + // constant addresses are constant folded instead. switch (CE->getOpcode()) { case Instruction::AddrSpaceCast: { const Constant *Op = CE->getOperand(0); @@ -2658,34 +2842,17 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return RelocExpr; } } + + const MCExpr *LHS = lowerConstant(CE->getOperand(0)); + const MCExpr *RHS = lowerConstant(CE->getOperand(1)); + return MCBinaryExpr::createSub(LHS, RHS, Ctx); + break; } - // else fallthrough - LLVM_FALLTHROUGH; - - // The MC library also has a right-shift operator, but it isn't consistently - // signed or unsigned between different targets. - case Instruction::Add: - case Instruction::Mul: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::Shl: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { + + case Instruction::Add: { const MCExpr *LHS = lowerConstant(CE->getOperand(0)); const MCExpr *RHS = lowerConstant(CE->getOperand(1)); - switch (CE->getOpcode()) { - default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx); - } + return MCBinaryExpr::createAdd(LHS, RHS, Ctx); } } } @@ -2719,7 +2886,7 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { assert(Size % 8 == 0); // Extend the element to take zero padding into account. - APInt Value = CI->getValue().zextOrSelf(Size); + APInt Value = CI->getValue().zext(Size); if (!Value.isSplat(8)) return -1; @@ -2768,8 +2935,8 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, if (isa<IntegerType>(CDS->getElementType())) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", - CDS->getElementAsInteger(i)); + AP.OutStreamer->getCommentOS() + << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); AP.OutStreamer->emitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } @@ -2855,8 +3022,8 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { if (AP.isVerbose()) { SmallString<8> StrVal; APF.toString(StrVal); - ET->print(AP.OutStreamer->GetCommentOS()); - AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; + ET->print(AP.OutStreamer->getCommentOS()); + AP.OutStreamer->getCommentOS() << ' ' << StrVal << '\n'; } // Now iterate through the APInt chunks, emitting them in endian-correct @@ -3061,8 +3228,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, if (StoreSize <= 8) { if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", - CI->getZExtValue()); + AP.OutStreamer->getCommentOS() + << format("0x%" PRIx64 "\n", CI->getZExtValue()); AP.OutStreamer->emitIntValue(CI->getZExtValue(), StoreSize); } else { emitGlobalConstantLargeInt(CI, AP); @@ -3163,11 +3330,12 @@ MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { } MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { - return MMI->getAddrLabelSymbol(BA->getBasicBlock()); + return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol( + BA->getBasicBlock()); } MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { - return MMI->getAddrLabelSymbol(BB); + return const_cast<AsmPrinter *>(this)->getAddrLabelSymbol(BB); } /// GetCPISymbol - Return the symbol for the specified constant pool entry. @@ -3272,7 +3440,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, // Otherwise, it is a loop header. Print out information about child and // parent loops. - raw_ostream &OS = AP.OutStreamer->GetCommentOS(); + raw_ostream &OS = AP.OutStreamer->getCommentOS(); PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); @@ -3308,7 +3476,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // entry block is always placed in the function section and is handled // separately. if (MBB.isBeginSection() && !MBB.isEntryBlock()) { - OutStreamer->SwitchSection( + OutStreamer->switchSection( getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(), MBB, TM)); CurrentSectionBeginSym = MBB.getSymbol(); @@ -3326,7 +3494,7 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { // MBBs can have their address taken as part of CodeGen without having // their corresponding BB's address taken in IR if (BB && BB->hasAddressTaken()) - for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) + for (MCSymbol *Sym : getAddrLabelSymbolToEmit(BB)) OutStreamer->emitLabel(Sym); } @@ -3334,9 +3502,9 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { if (isVerbose()) { if (BB) { if (BB->hasName()) { - BB->printAsOperand(OutStreamer->GetCommentOS(), + BB->printAsOperand(OutStreamer->getCommentOS(), /*PrintType=*/false, BB->getModule()); - OutStreamer->GetCommentOS() << '\n'; + OutStreamer->getCommentOS() << '\n'; } } @@ -3563,7 +3731,7 @@ void AsmPrinter::emitXRayTable() { // range of sleds associated with a function. auto &Ctx = OutContext; MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); - OutStreamer->SwitchSection(InstMap); + OutStreamer->switchSection(InstMap); OutStreamer->emitLabel(SledsStart); for (const auto &Sled : Sleds) { MCSymbol *Dot = Ctx.createTempSymbol(); @@ -3590,11 +3758,11 @@ void AsmPrinter::emitXRayTable() { // Each entry here will be 2 * word size aligned, as we're writing down two // pointers. This should work for both 32-bit and 64-bit platforms. if (FnSledIndex) { - OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->switchSection(FnSledIndex); OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo()); OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false); OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false); - OutStreamer->SwitchSection(PrevSection); + OutStreamer->switchSection(PrevSection); } Sleds.clear(); } @@ -3639,7 +3807,7 @@ void AsmPrinter::emitPatchableFunctionEntries() { } LinkedToSym = cast<MCSymbolELF>(CurrentFnSym); } - OutStreamer->SwitchSection(OutContext.getELFSection( + OutStreamer->switchSection(OutContext.getELFSection( "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym)); emitAlignment(Align(PointerSize)); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index fc127f4cf9da..719fec06aa33 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -19,14 +18,11 @@ #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include <cstdint> using namespace llvm; @@ -162,7 +158,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, if (MAI->needsDwarfSectionOffsetDirective()) { assert(!isDwarf64() && "emitting DWARF64 is not implemented for COFF targets"); - OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); + OutStreamer->emitCOFFSecRel32(Label, /*Offset=*/0); return; } @@ -277,6 +273,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpUndefined: OutStreamer->emitCFIUndefined(Inst.getRegister()); break; + case MCCFIInstruction::OpRememberState: + OutStreamer->emitCFIRememberState(); + break; + case MCCFIInstruction::OpRestoreState: + OutStreamer->emitCFIRestoreState(); + break; } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 5d0cadefdbf7..88c82cbc958b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -17,8 +17,8 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" @@ -26,9 +26,10 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/ErrorHandling.h" @@ -115,7 +116,7 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); - Parser->setTargetParser(*TAP.get()); + Parser->setTargetParser(*TAP); // Enable lexing Masm binary and hex integer literals in intel inline // assembly. if (Dialect == InlineAsm::AD_Intel) @@ -398,9 +399,9 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { if (!RestrRegs.empty()) { std::string Msg = "inline asm clobber list contains reserved registers: "; ListSeparator LS; - for (const Register &RR : RestrRegs) { + for (const Register RR : RestrRegs) { Msg += LS; - Msg += TRI->getName(RR); + Msg += TRI->getRegAsmName(RR); } const char *Note = "Reserved registers on the clobber list may not be " diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 52c74713551c..701c0affdfa6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "CodeViewDebug.h" -#include "DwarfExpression.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -29,7 +28,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -41,7 +39,6 @@ #include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableCollection.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" @@ -58,11 +55,8 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/BinaryByteStream.h" -#include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -230,7 +224,7 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { break; } } - bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, + bool Success = OS.emitCVFileDirective(NextId, FullPath, ChecksumAsBytes, static_cast<unsigned>(CSKind)); (void)Success; assert(Success && ".cv_file directive failed"); @@ -251,7 +245,7 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt, .SiteFuncId; Site->SiteFuncId = NextFuncId++; - OS.EmitCVInlineSiteIdDirective( + OS.emitCVInlineSiteIdDirective( Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()), InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc()); Site->Inlinee = Inlinee; @@ -515,7 +509,7 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, if (!DL || DL == PrevInstLoc) return; - const DIScope *Scope = DL.get()->getScope(); + const DIScope *Scope = DL->getScope(); if (!Scope) return; @@ -614,18 +608,16 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { void CodeViewDebug::beginModule(Module *M) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. - NamedMDNode *CUs = M->getNamedMetadata("llvm.dbg.cu"); - if (!CUs || !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { + if (!MMI->hasDebugInfo() || + !Asm->getObjFileLowering().getCOFFDebugSymbolsSection()) { Asm = nullptr; return; } - // Tell MMI that we have and need debug info. - MMI->setDebugInfoAvailability(true); TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); // Get the current source language. - const MDNode *Node = *CUs->operands().begin(); + const MDNode *Node = *M->debug_compile_units_begin(); const auto *CU = cast<DICompileUnit>(Node); CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage()); @@ -727,7 +719,7 @@ void CodeViewDebug::emitTypeInformation() { return; // Start the .debug$T or .debug$P section with 0x4. - OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); + OS.switchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); emitCodeViewMagicVersion(); TypeTableCollection Table(TypeTable.records()); @@ -760,7 +752,7 @@ void CodeViewDebug::emitTypeGlobalHashes() { // Start the .debug$H section with the version and hash algorithm, currently // hardcoded to version 0, SHA1. - OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); + OS.switchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); OS.emitValueToAlignment(4); OS.AddComment("Magic"); @@ -826,6 +818,8 @@ static Version parseVersion(StringRef Name) { if (isdigit(C)) { V.Part[N] *= 10; V.Part[N] += C - '0'; + V.Part[N] = + std::min<int>(V.Part[N], std::numeric_limits<uint16_t>::max()); } else if (C == '.') { ++N; if (N >= 4) @@ -867,7 +861,6 @@ void CodeViewDebug::emitCompilerInformation() { Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); for (int N : FrontVer.Part) { - N = std::min<int>(N, std::numeric_limits<uint16_t>::max()); OS.emitInt16(N); } @@ -985,11 +978,11 @@ void CodeViewDebug::emitInlineeLinesSubsection() { assert(TypeIndices.count({SP, nullptr})); TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}]; - OS.AddBlankLine(); + OS.addBlankLine(); unsigned FileId = maybeRecordFile(SP->getFile()); OS.AddComment("Inlined function " + SP->getName() + " starts at " + SP->getFilename() + Twine(':') + Twine(SP->getLine())); - OS.AddBlankLine(); + OS.addBlankLine(); OS.AddComment("Type index of inlined function"); OS.emitInt32(InlineeIdx.getIndex()); OS.AddComment("Offset into filechecksum table"); @@ -1051,7 +1044,7 @@ void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym); - OS.SwitchSection(DebugSec); + OS.switchSection(DebugSec); // Emit the magic version number if this is the first time we've switched to // this section. @@ -1080,9 +1073,9 @@ void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, OS.AddComment("PtrNext"); OS.emitInt32(0); OS.AddComment("Thunk section relative address"); - OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.emitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Thunk section index"); - OS.EmitCOFFSectionIndex(Fn); + OS.emitCOFFSectionIndex(Fn); OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); OS.AddComment("Ordinal"); @@ -1132,7 +1125,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // Emit FPO data, but only on 32-bit x86. No other platforms use it. if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) - OS.EmitCVFPOData(Fn); + OS.emitCVFPOData(Fn); // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); @@ -1160,9 +1153,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.AddComment("Function type index"); OS.emitInt32(getFuncIdForSubprogram(GV->getSubprogram()).getIndex()); OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.emitCOFFSecRel32(Fn, /*Offset=*/0); OS.AddComment("Function section index"); - OS.EmitCOFFSectionIndex(Fn); + OS.emitCOFFSectionIndex(Fn); OS.AddComment("Flags"); OS.emitInt8(0); // Emit the function display name as a null-terminated string. @@ -1207,9 +1200,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, MCSymbol *Label = Annot.first; MDTuple *Strs = cast<MDTuple>(Annot.second); MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION); - OS.EmitCOFFSecRel32(Label, /*Offset=*/0); + OS.emitCOFFSecRel32(Label, /*Offset=*/0); // FIXME: Make sure we don't overflow the max record size. - OS.EmitCOFFSectionIndex(Label); + OS.emitCOFFSectionIndex(Label); OS.emitInt16(Strs->getNumOperands()); for (Metadata *MD : Strs->operands()) { // MDStrings are null terminated, so we can do EmitBytes and get the @@ -1227,9 +1220,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, const DIType *DITy = std::get<2>(HeapAllocSite); MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); OS.AddComment("Call site offset"); - OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); + OS.emitCOFFSecRel32(BeginLabel, /*Offset=*/0); OS.AddComment("Call site section index"); - OS.EmitCOFFSectionIndex(BeginLabel); + OS.emitCOFFSectionIndex(BeginLabel); OS.AddComment("Call instruction length"); OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); OS.AddComment("Type index"); @@ -1249,9 +1242,9 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, OS.emitCVLinetableDirective(FI.FuncId, Fn, FI.End); } -CodeViewDebug::LocalVarDefRange +CodeViewDebug::LocalVarDef CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { - LocalVarDefRange DR; + LocalVarDef DR; DR.InMemory = -1; DR.DataOffset = Offset; assert(DR.DataOffset == Offset && "truncation"); @@ -1303,19 +1296,19 @@ void CodeViewDebug::collectVariableInfoFromMFTable( "Frame offsets with a scalable component are not supported"); // Calculate the label ranges. - LocalVarDefRange DefRange = + LocalVarDef DefRange = createDefRangeMem(CVReg, FrameOffset.getFixed() + ExprOffset); + LocalVariable Var; + Var.DIVar = VI.Var; + for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); End = End ? End : Asm->getFunctionEnd(); - DefRange.Ranges.emplace_back(Begin, End); + Var.DefRanges[DefRange].emplace_back(Begin, End); } - LocalVariable Var; - Var.DIVar = VI.Var; - Var.DefRanges.emplace_back(std::move(DefRange)); if (Deref) Var.UseReferenceType = true; @@ -1374,24 +1367,18 @@ void CodeViewDebug::calculateRanges( // We can only handle a register or an offseted load of a register. if (Location->Register == 0 || Location->LoadChain.size() > 1) continue; - { - LocalVarDefRange DR; - DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); - DR.InMemory = !Location->LoadChain.empty(); - DR.DataOffset = - !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; - if (Location->FragmentInfo) { - DR.IsSubfield = true; - DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; - } else { - DR.IsSubfield = false; - DR.StructOffset = 0; - } - if (Var.DefRanges.empty() || - Var.DefRanges.back().isDifferentLocation(DR)) { - Var.DefRanges.emplace_back(std::move(DR)); - } + LocalVarDef DR; + DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); + DR.InMemory = !Location->LoadChain.empty(); + DR.DataOffset = + !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; + if (Location->FragmentInfo) { + DR.IsSubfield = true; + DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; + } else { + DR.IsSubfield = false; + DR.StructOffset = 0; } // Compute the label range. @@ -1408,7 +1395,7 @@ void CodeViewDebug::calculateRanges( // If the last range end is our begin, just extend the last range. // Otherwise make a new range. SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R = - Var.DefRanges.back().Ranges; + Var.DefRanges[DR]; if (!R.empty() && R.back().second == Begin) R.back().second = End; else @@ -1525,7 +1512,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { // FIXME: Set GuardCfg when it is implemented. CurFn->FrameProcOpts = FPO; - OS.EmitCVFuncIdDirective(CurFn->FuncId); + OS.emitCVFuncIdDirective(CurFn->FuncId); // Find the end of the function prolog. First known non-DBG_VALUE and // non-frame setup location marks the beginning of the function body. @@ -1825,6 +1812,7 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { break; case dwarf::DW_ATE_UTF: switch (ByteSize) { + case 1: STK = SimpleTypeKind::Character8; break; case 2: STK = SimpleTypeKind::Character16; break; case 4: STK = SimpleTypeKind::Character32; break; } @@ -2820,7 +2808,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, // records and on disk formats are described in SymbolRecords.h. BytePrefix // should be big enough to hold all forms without memory allocation. SmallString<20> BytePrefix; - for (const LocalVarDefRange &DefRange : Var.DefRanges) { + for (const auto &Pair : Var.DefRanges) { + LocalVarDef DefRange = Pair.first; + const auto &Ranges = Pair.second; BytePrefix.clear(); if (DefRange.InMemory) { int Offset = DefRange.DataOffset; @@ -2844,7 +2834,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, : (EncFP == FI.EncodedLocalFramePtrReg))) { DefRangeFramePointerRelHeader DRHdr; DRHdr.Offset = Offset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } else { uint16_t RegRelFlags = 0; if (DefRange.IsSubfield) { @@ -2856,7 +2846,7 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = Reg; DRHdr.Flags = RegRelFlags; DRHdr.BasePointerOffset = Offset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } } else { assert(DefRange.DataOffset == 0 && "unexpected offset into register"); @@ -2865,12 +2855,12 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; DRHdr.OffsetInParent = DefRange.StructOffset; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } else { DefRangeRegisterHeader DRHdr; DRHdr.Register = DefRange.CVRegister; DRHdr.MayHaveNoName = 0; - OS.emitCVDefRangeDirective(DefRange.Ranges, DRHdr); + OS.emitCVDefRangeDirective(Ranges, DRHdr); } } } @@ -2894,9 +2884,9 @@ void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, OS.AddComment("Code size"); OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size OS.AddComment("Function section relative address"); - OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset + OS.emitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset OS.AddComment("Function section index"); - OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol + OS.emitCOFFSectionIndex(FI.Begin); // Func Symbol OS.AddComment("Lexical block name"); emitNullTerminatedSymbolName(OS, Block.Name); // Name endSymbolRecord(RecordEnd); @@ -3181,6 +3171,11 @@ void CodeViewDebug::collectGlobalVariableInfo() { for (const auto *GVE : CU->getGlobalVariables()) { const DIGlobalVariable *DIGV = GVE->getVariable(); const DIExpression *DIE = GVE->getExpression(); + // Don't emit string literals in CodeView, as the only useful parts are + // generally the filename and line number, which isn't possible to output + // in CodeView. String literals should be the only unnamed GlobalVariable + // with debug info. + if (DIGV->getName().empty()) continue; if ((DIE->getNumElements() == 2) && (DIE->getElement(0) == dwarf::DW_OP_plus_uconst)) @@ -3380,10 +3375,10 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end()) // Use the offset seen while collecting info on globals. Offset = CVGlobalVariableOffsets[DIGV]; - OS.EmitCOFFSecRel32(GVSym, Offset); + OS.emitCOFFSecRel32(GVSym, Offset); OS.AddComment("Segment"); - OS.EmitCOFFSectionIndex(GVSym); + OS.emitCOFFSectionIndex(GVSym); OS.AddComment("Name"); const unsigned LengthOfDataRecord = 12; emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d1fc3cdccb20..16f0082723ed 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -50,18 +50,8 @@ class MachineFunction; /// Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { - MCStreamer &OS; - BumpPtrAllocator Allocator; - codeview::GlobalTypeTableBuilder TypeTable; - - /// Whether to emit type record hashes into .debug$H. - bool EmitDebugGlobalHashes = false; - - /// The codeview CPU type used by the translation unit. - codeview::CPUType TheCPU; - - /// Represents the most general definition range. - struct LocalVarDefRange { +public: + struct LocalVarDef { /// Indicates that variable data is stored in memory relative to the /// specified register. int InMemory : 1; @@ -79,23 +69,40 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// location containing the data. uint16_t CVRegister; - /// Compares all location fields. This includes all fields except the label - /// ranges. - bool isDifferentLocation(LocalVarDefRange &O) { - return InMemory != O.InMemory || DataOffset != O.DataOffset || - IsSubfield != O.IsSubfield || StructOffset != O.StructOffset || - CVRegister != O.CVRegister; + uint64_t static toOpaqueValue(const LocalVarDef DR) { + uint64_t Val = 0; + std::memcpy(&Val, &DR, sizeof(Val)); + return Val; } - SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges; + LocalVarDef static createFromOpaqueValue(uint64_t Val) { + LocalVarDef DR; + std::memcpy(&DR, &Val, sizeof(Val)); + return DR; + } }; - static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); + static_assert(sizeof(uint64_t) == sizeof(LocalVarDef), ""); + +private: + MCStreamer &OS; + BumpPtrAllocator Allocator; + codeview::GlobalTypeTableBuilder TypeTable; + + /// Whether to emit type record hashes into .debug$H. + bool EmitDebugGlobalHashes = false; + + /// The codeview CPU type used by the translation unit. + codeview::CPUType TheCPU; + + static LocalVarDef createDefRangeMem(uint16_t CVRegister, int Offset); /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. struct LocalVariable { const DILocalVariable *DIVar = nullptr; - SmallVector<LocalVarDefRange, 1> DefRanges; + MapVector<LocalVarDef, + SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1>> + DefRanges; bool UseReferenceType = false; }; @@ -493,6 +500,27 @@ public: void beginInstruction(const MachineInstr *MI) override; }; +template <> struct DenseMapInfo<CodeViewDebug::LocalVarDef> { + + static inline CodeViewDebug::LocalVarDef getEmptyKey() { + return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL); + } + + static inline CodeViewDebug::LocalVarDef getTombstoneKey() { + return CodeViewDebug::LocalVarDef::createFromOpaqueValue(~0ULL - 1ULL); + } + + static unsigned getHashValue(const CodeViewDebug::LocalVarDef &DR) { + return CodeViewDebug::LocalVarDef::toOpaqueValue(DR) * 37ULL; + } + + static bool isEqual(const CodeViewDebug::LocalVarDef &LHS, + const CodeViewDebug::LocalVarDef &RHS) { + return CodeViewDebug::LocalVarDef::toOpaqueValue(LHS) == + CodeViewDebug::LocalVarDef::toOpaqueValue(RHS); + } +}; + } // end namespace llvm #endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 396322c4979d..617ddbd66e4e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -13,21 +13,15 @@ #include "llvm/CodeGen/DIE.h" #include "DwarfCompileUnit.h" #include "DwarfDebug.h" -#include "DwarfUnit.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -170,7 +164,7 @@ DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) { void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const { if (!Abbreviations.empty()) { // Start the debug abbrev section. - AP->OutStreamer->SwitchSection(Section); + AP->OutStreamer->switchSection(Section); AP->emitDwarfAbbrevs(Abbreviations); } } @@ -204,6 +198,7 @@ const DIE *DIE::getUnitDie() const { const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_skeleton_unit || p->getTag() == dwarf::DW_TAG_type_unit) return p; p = p->getParent(); @@ -378,7 +373,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); return; case dwarf::DW_FORM_flag: case dwarf::DW_FORM_ref1: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index e175854f7b93..5da50d7aab9f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -19,7 +19,6 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index dd795079ac1a..1358f4d25990 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -204,7 +203,7 @@ void DbgValueHistoryMap::trimLocationRanges( if (auto R = intersects(StartMI, EndMI, ScopeRanges, Ordering)) { // Adjust ScopeRanges to exclude ranges which subsequent location ranges // cannot possibly intersect. - ScopeRanges = ArrayRef<InsnRange>(R.getValue(), ScopeRanges.end()); + ScopeRanges = ArrayRef<InsnRange>(*R, ScopeRanges.end()); } else { // If the location range does not intersect any scope range then the // DBG_VALUE which opened this location range is usless, mark it for diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 18fc46c74eb4..660a064687d3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/DebugHandlerBase.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 63343d2519f9..5f187acf13dc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -11,23 +11,13 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -53,7 +43,7 @@ void DwarfCFIExceptionBase::endFragment() { DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} -DwarfCFIException::~DwarfCFIException() {} +DwarfCFIException::~DwarfCFIException() = default; /// endModule - Emit all exception information that should come after the /// content. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 5913c687db48..b3f99d346faa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -67,13 +66,13 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, /// DW_FORM_addr or DW_FORM_GNU_addr_index. void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { + if ((Skeleton || !DD->useSplitDwarf()) && Label) + DD->addArangeLabel(SymbolCU(this, Label)); + // Don't use the address pool in non-fission or in the skeleton unit itself. if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5) return addLocalLabelAddress(Die, Attribute, Label); - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - bool UseAddrOffsetFormOrExpressions = DD->useAddrOffsetForm() || DD->useAddrOffsetExpressions(); @@ -109,9 +108,6 @@ void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - - if (Label) addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIELabel(Label)); else addAttribute(Die, Attribute, dwarf::DW_FORM_addr, DIEInteger(0)); @@ -169,7 +165,9 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( } else { DeclContext = GV->getScope(); // Add name and type. - addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); + StringRef DisplayName = GV->getDisplayName(); + if (!DisplayName.empty()) + addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); if (GTy) addType(*VariableDIE, GTy); @@ -303,8 +301,11 @@ void DwarfCompileUnit::addLocationAttribute( DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address : dwarf::DW_OP_form_tls_address); } - } else if (Asm->TM.getRelocationModel() == Reloc::RWPI || - Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) { + } else if ((Asm->TM.getRelocationModel() == Reloc::RWPI || + Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) && + !Asm->getObjFileLowering() + .getKindForGlobal(Global, Asm->TM) + .isReadOnly()) { auto FormAndOp = GetPointerSizedFormAndOp(); // Constant addUInt(*Loc, dwarf::DW_FORM_data1, FormAndOp.Op); @@ -505,7 +506,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { // FIXME: when writing dwo, we need to avoid relocations. Probably // the "right" solution is to treat globals the way func and data // symbols are (with entries in .debug_addr). - // For now, since we only ever use index 0, this should work as-is. + // For now, since we only ever use index 0, this should work as-is. addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index); } addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index f2e1f6346803..61412cde34c8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -25,7 +25,6 @@ #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/Casting.h" -#include <algorithm> #include <cassert> #include <cstdint> #include <memory> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 609b568f28be..866338a949f3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,8 +31,8 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -45,14 +45,11 @@ #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MachineLocation.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -360,7 +357,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A) DebuggerTuning = Asm->TM.Options.DebuggerTuning; else if (IsDarwin) DebuggerTuning = DebuggerKind::LLDB; - else if (TT.isPS4CPU()) + else if (TT.isPS()) DebuggerTuning = DebuggerKind::SCE; else if (TT.isOSAIX()) DebuggerTuning = DebuggerKind::DBX; @@ -2315,7 +2312,7 @@ void DwarfDebug::emitStringOffsetsTableHeader() { template <typename AccelTableT> void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName) { - Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->switchSection(Section); // Emit the full data. emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol()); @@ -2434,12 +2431,12 @@ void DwarfDebug::emitDebugPubSections() { bool GnuStyle = TheU->getCUNode()->getNameTableKind() == DICompileUnit::DebugNameTableKind::GNU; - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection()); emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames()); - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection()); emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes()); @@ -2849,7 +2846,7 @@ void DwarfDebug::emitDebugLocImpl(MCSection *Sec) { if (DebugLocs.getLists().empty()) return; - Asm->OutStreamer->SwitchSection(Sec); + Asm->OutStreamer->switchSection(Sec); MCSymbol *TableEnd = nullptr; if (getDwarfVersion() >= 5) @@ -2880,7 +2877,7 @@ void DwarfDebug::emitDebugLocDWO() { } for (const auto &List : DebugLocs.getLists()) { - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( Asm->getObjFileLowering().getDwarfLocDWOSection()); Asm->OutStreamer->emitLabel(List.Label); @@ -2953,8 +2950,8 @@ void DwarfDebug::emitDebugARanges() { // Sort the symbols by offset within the section. llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; + unsigned IA = A.Sym ? Asm->OutStreamer->getSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->getSymbolOrder(B.Sym) : 0; // Symbols with no order assigned should be placed at the end. // (e.g. section end labels) @@ -2987,7 +2984,7 @@ void DwarfDebug::emitDebugARanges() { } // Start the dwarf aranges section. - Asm->OutStreamer->SwitchSection( + Asm->OutStreamer->switchSection( Asm->getObjFileLowering().getDwarfARangesSection()); unsigned PtrSize = Asm->MAI->getCodePointerSize(); @@ -3045,15 +3042,22 @@ void DwarfDebug::emitDebugARanges() { for (const ArangeSpan &Span : List) { Asm->emitLabelReference(Span.Start, PtrSize); - // Calculate the size as being from the span start to it's end. - if (Span.End) { + // Calculate the size as being from the span start to its end. + // + // If the size is zero, then round it up to one byte. The DWARF + // specification requires that entries in this table have nonzero + // lengths. + auto SizeRef = SymSize.find(Span.Start); + if ((SizeRef == SymSize.end() || SizeRef->second != 0) && Span.End) { Asm->emitLabelDifference(Span.End, Span.Start, PtrSize); } else { // For symbols without an end marker (e.g. common), we // write a single arange entry containing just that one symbol. - uint64_t Size = SymSize[Span.Start]; - if (Size == 0) + uint64_t Size; + if (SizeRef == SymSize.end() || SizeRef->second == 0) Size = 1; + else + Size = SizeRef->second; Asm->OutStreamer->emitIntValue(Size, PtrSize); } @@ -3087,7 +3091,7 @@ void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section return !Pair.second->getCUNode()->isDebugDirectivesOnly(); })); - Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->switchSection(Section); MCSymbol *TableEnd = nullptr; if (getDwarfVersion() >= 5) @@ -3239,7 +3243,7 @@ void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) { DIMacroNodeArray Macros = CUNode->getMacros(); if (Macros.empty()) continue; - Asm->OutStreamer->SwitchSection(Section); + Asm->OutStreamer->switchSection(Section); Asm->OutStreamer->emitLabel(U.getMacroLabelBegin()); if (UseDebugMacroSection) emitMacroHeader(Asm, *this, U, getDwarfVersion()); @@ -3447,22 +3451,6 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, CU.addDIETypeSignature(RefDie, Signature); } -DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) - : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { - DD->TypeUnitsUnderConstruction.clear(); - DD->AddrPool.resetUsedFlag(); -} - -DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { - DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); - DD->AddrPool.resetUsedFlag(AddrPoolUsed); -} - -DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { - return NonTypeUnitContext(this); -} - // Add the Name along with its companion DIE to the appropriate accelerator // table (for AccelTableKind::Dwarf it's always AccelDebugNames, for // AccelTableKind::Apple, we use the table we got as an argument). If @@ -3555,6 +3543,6 @@ Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const { // An MD5 checksum is 16 bytes. std::string ChecksumString = fromHex(Checksum->Value); MD5::MD5Result CKMem; - std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data()); + std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data()); return CKMem; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4e1a1b1e068d..31e4081b7141 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,14 +14,13 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #include "AddressPool.h" -#include "DebugLocStream.h" #include "DebugLocEntry.h" +#include "DebugLocStream.h" #include "DwarfFile.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -31,7 +30,6 @@ #include "llvm/CodeGen/AccelTable.h" #include "llvm/CodeGen/DbgEntityHistoryCalculator.h" #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Metadata.h" @@ -80,7 +78,7 @@ private: public: DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID) : Entity(N), InlinedAt(IA), SubclassID(ID) {} - virtual ~DbgEntity() {} + virtual ~DbgEntity() = default; /// Accessors. /// @{ @@ -667,19 +665,6 @@ public: void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); - class NonTypeUnitContext { - DwarfDebug *DD; - decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; - bool AddrPoolUsed; - friend class DwarfDebug; - NonTypeUnitContext(DwarfDebug *DD); - public: - NonTypeUnitContext(NonTypeUnitContext&&) = default; - ~NonTypeUnitContext(); - }; - - NonTypeUnitContext enterNonTypeUnitContext(); - /// Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index fe438102ee98..1c21d5ee8bb1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -329,7 +329,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return false; } - assert(DwarfRegs.size() == 1); + // TODO: We should not give up here but the following code needs to be changed + // to deal with multiple (sub)registers first. + if (DwarfRegs.size() > 1) { + LLVM_DEBUG(dbgs() << "TODO: giving up on debug information due to " + "multi-register usage.\n"); + DwarfRegs.clear(); + LocationKind = Unknown; + return false; + } + auto Reg = DwarfRegs[0]; bool FBReg = isFrameRegister(TRI, MachineReg); int SignedOffset = 0; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index a67d0f032cf6..a497aa07284e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -12,9 +12,7 @@ #include "DwarfUnit.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Metadata.h" #include "llvm/MC/MCStreamer.h" -#include <algorithm> #include <cstdint> using namespace llvm; @@ -47,7 +45,7 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { if (llvm::empty(TheU->getUnitDie().values())) return; - Asm->OutStreamer->SwitchSection(S); + Asm->OutStreamer->switchSection(S); TheU->emitHeader(UseOffsets); Asm->emitDwarfDIE(TheU->getUnitDie()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index a876f8ccace9..67b72f0b455d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -39,7 +39,7 @@ DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, StringRef Str) { auto &MapEntry = getEntryImpl(Asm, Str); - return EntryRef(MapEntry, false); + return EntryRef(MapEntry); } DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, @@ -47,7 +47,7 @@ DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, auto &MapEntry = getEntryImpl(Asm, Str); if (!MapEntry.getValue().isIndexed()) MapEntry.getValue().Index = NumIndexedStrings++; - return EntryRef(MapEntry, true); + return EntryRef(MapEntry); } void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, @@ -55,7 +55,7 @@ void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSymbol *StartSym) { if (getNumIndexedStrings() == 0) return; - Asm.OutStreamer->SwitchSection(Section); + Asm.OutStreamer->switchSection(Section); unsigned EntrySize = Asm.getDwarfOffsetByteSize(); // We are emitting the header for a contribution to the string offsets // table. The header consists of an entry with the contribution's @@ -78,7 +78,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, return; // Start the dwarf str section. - Asm.OutStreamer->SwitchSection(StrSection); + Asm.OutStreamer->switchSection(StrSection); // Get all of the string pool entries and sort them by their offset. SmallVector<const StringMapEntry<EntryTy> *, 64> Entries; @@ -117,7 +117,7 @@ void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, Entries[Entry.getValue().Index] = &Entry; } - Asm.OutStreamer->SwitchSection(OffsetSection); + Asm.OutStreamer->switchSection(OffsetSection); unsigned size = Asm.getDwarfOffsetByteSize(); for (const auto &Entry : Entries) if (UseRelativeOffsets) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 5a2bd479f277..81238b0fe0d2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -17,12 +17,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -32,9 +28,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include <cassert> #include <cstdint> @@ -380,6 +374,8 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, CU = getUnitDie().getUnit(); if (!EntryCU) EntryCU = getUnitDie().getUnit(); + assert(EntryCU == CU || !DD->useSplitDwarf() || DD->shareAcrossDWOCUs() || + !static_cast<const DwarfUnit*>(CU)->isDwoUnit()); addAttribute(Die, Attribute, EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, Entry); @@ -596,10 +592,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, // Skip updating the accelerator tables since this is not the full type. if (MDString *TypeId = CTy->getRawIdentifier()) DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); - else { - auto X = DD->enterNonTypeUnitContext(); + else finishNonUnitTypeDIE(TyDIE, CTy); - } return &TyDIE; } constructTypeDIE(TyDIE, CTy); @@ -805,7 +799,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { // or reference types. if (DTy->getDWARFAddressSpace()) addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4, - DTy->getDWARFAddressSpace().getValue()); + *DTy->getDWARFAddressSpace()); } void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { @@ -1350,6 +1344,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (SP->isRecursive()) addFlag(SPDie, dwarf::DW_AT_recursive); + if (!SP->getTargetFuncName().empty()) + addString(SPDie, dwarf::DW_AT_trampoline, SP->getTargetFuncName()); + if (DD->getDwarfVersion() >= 5 && SP->isDeleted()) addFlag(SPDie, dwarf::DW_AT_deleted); } @@ -1442,7 +1439,8 @@ DIE *DwarfUnit::getIndexTyDie() { addString(*IndexTyDie, dwarf::DW_AT_name, Name); addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_unsigned); + dwarf::getArrayIndexTypeEncoding( + (dwarf::SourceLanguage)getLanguage())); DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0); return IndexTyDie; } @@ -1847,11 +1845,5 @@ void DwarfUnit::addRnglistsBase() { } void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { - addFlag(D, dwarf::DW_AT_declaration); - StringRef Name = CTy->getName(); - if (!Name.empty()) - addString(D, dwarf::DW_AT_name, Name); - if (Name.startswith("_STN") || !Name.contains('<')) - addTemplateParams(D, CTy->getTemplateParams()); - getCU().createTypeDIE(CTy); + DD->getAddressPool().resetUsedFlag(true); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 39f40b172c1b..31644959bdca 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -458,7 +457,7 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Sometimes we want not to emit the data into separate section (e.g. ARM // EHABI). In this case LSDASection will be NULL. if (LSDASection) - Asm->OutStreamer->SwitchSection(LSDASection); + Asm->OutStreamer->switchSection(LSDASection); Asm->emitAlignment(Align(4)); // Emit the LSDA. @@ -806,7 +805,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = TypeInfos.size(); } @@ -821,7 +820,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { // Emit the Exception Specifications. if (VerboseAsm && !FilterIds.empty()) { Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); - Asm->OutStreamer->AddBlankLine(); + Asm->OutStreamer->addBlankLine(); Entry = 0; } for (std::vector<unsigned>::const_iterator diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 70777f07fc6c..62fd15d89512 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -23,7 +23,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -46,9 +45,8 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, unsigned IntPtrSize = M.getDataLayout().getPointerSize(); // Put this in a custom .note section. - OS.SwitchSection( - AP.getObjFileLowering().getContext().getELFSection(".note.gc", - ELF::SHT_PROGBITS, 0)); + OS.switchSection(AP.getObjFileLowering().getContext().getELFSection( + ".note.gc", ELF::SHT_PROGBITS, 0)); // For each function... for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 3ade262d9af2..74fa30ab321b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -72,10 +72,10 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_begin"); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "data_begin"); } @@ -99,16 +99,16 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) { unsigned IntPtrSize = M.getDataLayout().getPointerSize(); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(M, AP, "code_end"); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "data_end"); // FIXME: Why does ocaml emit this?? AP.OutStreamer->emitIntValue(0, IntPtrSize); - AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + AP.OutStreamer->switchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(M, AP, "frametable"); int NumDescriptors = 0; @@ -147,7 +147,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, AP.OutStreamer->AddComment("live roots for " + Twine(FI->getFunction().getName())); - AP.OutStreamer->AddBlankLine(); + AP.OutStreamer->addBlankLine(); for (GCFunctionInfo::iterator J = FI->begin(), JE = FI->end(); J != JE; ++J) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp index bab187f46535..135eabc34838 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp @@ -13,7 +13,7 @@ #include "PseudoProbePrinter.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/MC/MCStreamer.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp index a17a2ca2790e..a514ff161cee 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "WasmException.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h index f06de786bd76..2abbe37cb6d9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WasmException.h @@ -15,9 +15,12 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H #include "EHStreamer.h" -#include "llvm/CodeGen/AsmPrinter.h" namespace llvm { +class AsmPrinter; +class MachineFunction; +struct LandingPadInfo; +template <typename T> class SmallVectorImpl; class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer { public: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp index ad8432343a60..5d813b72c0b7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -15,11 +15,8 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Metadata.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCStreamer.h" @@ -29,7 +26,7 @@ using namespace llvm; WinCFGuard::WinCFGuard(AsmPrinter *A) : Asm(A) {} -WinCFGuard::~WinCFGuard() {} +WinCFGuard::~WinCFGuard() = default; void WinCFGuard::endFunction(const MachineFunction *MF) { @@ -110,19 +107,19 @@ void WinCFGuard::endModule() { // Emit the symbol index of each GFIDs entry to form the .gfids section. auto &OS = *Asm->OutStreamer; - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); for (const MCSymbol *S : GFIDsEntries) - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); // Emit the symbol index of each GIATs entry to form the .giats section. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGIATsSection()); for (const MCSymbol *S : GIATsEntries) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } // Emit the symbol index of each longjmp target to form the .gljmp section. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection()); for (const MCSymbol *S : LongjmpTargets) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index ef57031c7294..c3ca9c92bf71 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -23,19 +23,13 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; WinException::WinException(AsmPrinter *A) : EHStreamer(A) { @@ -46,7 +40,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) { isThumb = Asm->TM.getTargetTriple().isThumb(); } -WinException::~WinException() {} +WinException::~WinException() = default; /// endModule - Emit all exception information that should come after the /// content. @@ -55,13 +49,13 @@ void WinException::endModule() { const Module *M = MMI->getModule(); for (const Function &F : *M) if (F.hasFnAttribute("safeseh")) - OS.EmitCOFFSafeSEH(Asm->getSymbol(&F)); + OS.emitCOFFSafeSEH(Asm->getSymbol(&F)); if (M->getModuleFlag("ehcontguard") && !EHContTargets.empty()) { // Emit the symbol index of each ehcont target. - OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection()); + OS.switchSection(Asm->OutContext.getObjectFileInfo()->getGEHContSection()); for (const MCSymbol *S : EHContTargets) { - OS.EmitCOFFSymbolIndex(S); + OS.emitCOFFSymbolIndex(S); } } } @@ -122,7 +116,7 @@ void WinException::beginFunction(const MachineFunction *MF) { void WinException::markFunctionEnd() { if (isAArch64 && CurrentFuncletEntry && (shouldEmitMoves || shouldEmitPersonality)) - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd(); } /// endFunction - Gather and emit post-function exception information. @@ -151,12 +145,12 @@ void WinException::endFunction(const MachineFunction *MF) { return; if (shouldEmitPersonality || shouldEmitLSDA) { - Asm->OutStreamer->PushSection(); + Asm->OutStreamer->pushSection(); // Just switch sections to the right xdata section. MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( Asm->OutStreamer->getCurrentSectionOnly()); - Asm->OutStreamer->SwitchSection(XData); + Asm->OutStreamer->switchSection(XData); // Emit the tables appropriate to the personality function in use. If we // don't recognize the personality, assume it uses an Itanium-style LSDA. @@ -171,7 +165,7 @@ void WinException::endFunction(const MachineFunction *MF) { else emitExceptionTable(); - Asm->OutStreamer->PopSection(); + Asm->OutStreamer->popSection(); } if (!MF->getCatchretTargets().empty()) { @@ -211,11 +205,11 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, Sym = getMCSymbolForMBB(Asm, &MBB); // Describe our funclet symbol as a function with internal linkage. - Asm->OutStreamer->BeginCOFFSymbolDef(Sym); - Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); - Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + Asm->OutStreamer->beginCOFFSymbolDef(Sym); + Asm->OutStreamer->emitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->emitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT); - Asm->OutStreamer->EndCOFFSymbolDef(); + Asm->OutStreamer->endCOFFSymbolDef(); // We want our funclet's entry point to be aligned such that no nops will be // present after the label. @@ -229,7 +223,7 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // Mark 'Sym' as starting our funclet. if (shouldEmitMoves || shouldEmitPersonality) { CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly(); - Asm->OutStreamer->EmitWinCFIStartProc(Sym); + Asm->OutStreamer->emitWinCFIStartProc(Sym); } if (shouldEmitPersonality) { @@ -248,15 +242,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB, // inliner doesn't allow inlining them, this isn't a major problem in // practice. if (!CurrentFuncletEntry->isCleanupFuncletEntry()) - Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + Asm->OutStreamer->emitWinEHHandler(PersHandlerSym, true, true); } } void WinException::endFunclet() { if (isAArch64 && CurrentFuncletEntry && (shouldEmitMoves || shouldEmitPersonality)) { - Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + Asm->OutStreamer->switchSection(CurrentFuncletTextSection); + Asm->OutStreamer->emitWinCFIFuncletOrFuncEnd(); } endFuncletImpl(); } @@ -276,7 +270,7 @@ void WinException::endFuncletImpl() { if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && !CurrentFuncletEntry->isCleanupFuncletEntry()) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. @@ -287,14 +281,14 @@ void WinException::endFuncletImpl() { } else if (Per == EHPersonality::MSVC_TableSEH && MF->hasEHFunclets() && !CurrentFuncletEntry->isEHFuncletEntry()) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // If this is the parent function in Win64 SEH, emit the LSDA immediately // following .seh_handlerdata. emitCSpecificHandlerTable(MF); } else if (shouldEmitPersonality || shouldEmitLSDA) { // Emit an UNWIND_INFO struct describing the prologue. - Asm->OutStreamer->EmitWinEHHandlerData(); + Asm->OutStreamer->emitWinEHHandlerData(); // In these cases, no further info is written to the .xdata section // right here, but is written by e.g. emitExceptionTable in endFunction() // above. @@ -307,8 +301,8 @@ void WinException::endFuncletImpl() { // Switch back to the funclet start .text section now that we are done // writing to .xdata, and emit an .seh_endproc directive to mark the end of // the function. - Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); - Asm->OutStreamer->EmitWinCFIEndProc(); + Asm->OutStreamer->switchSection(CurrentFuncletTextSection); + Asm->OutStreamer->emitWinCFIEndProc(); } // Let's make sure we don't try to end the same funclet twice. @@ -699,7 +693,12 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { } int UnwindHelpOffset = 0; - if (Asm->MAI->usesWindowsCFI()) + // TODO: The check for UnwindHelpFrameIdx against max() below (and the + // second check further below) can be removed if MS C++ unwinding is + // implemented for ARM, when test/CodeGen/ARM/Windows/wineh-basic.ll + // passes without the check. + if (Asm->MAI->usesWindowsCFI() && + FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max()) UnwindHelpOffset = getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); @@ -761,7 +760,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { AddComment("IPToStateXData"); OS.emitValue(create32bitRef(IPToStateXData), 4); - if (Asm->MAI->usesWindowsCFI()) { + if (Asm->MAI->usesWindowsCFI() && + FuncInfo.UnwindHelpFrameIdx != std::numeric_limits<int>::max()) { AddComment("UnwindHelp"); OS.emitInt32(UnwindHelpOffset); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp index 4838f6da750d..5ce6fbb5f647 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" @@ -47,6 +47,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/LowerAtomic.h" #include <cassert> #include <cstdint> #include <iterator> @@ -57,71 +58,72 @@ using namespace llvm; namespace { - class AtomicExpand: public FunctionPass { - const TargetLowering *TLI = nullptr; +class AtomicExpand : public FunctionPass { + const TargetLowering *TLI = nullptr; - public: - static char ID; // Pass identification, replacement for typeid +public: + static char ID; // Pass identification, replacement for typeid - AtomicExpand() : FunctionPass(ID) { - initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); - } + AtomicExpand() : FunctionPass(ID) { + initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); + } - bool runOnFunction(Function &F) override; - - private: - bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); - IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); - LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); - bool tryExpandAtomicLoad(LoadInst *LI); - bool expandAtomicLoadToLL(LoadInst *LI); - bool expandAtomicLoadToCmpXchg(LoadInst *LI); - StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); - bool expandAtomicStore(StoreInst *SI); - bool tryExpandAtomicRMW(AtomicRMWInst *AI); - AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); - Value * - insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, - Align AddrAlign, AtomicOrdering MemOpOrder, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); - void expandAtomicOpToLLSC( - Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign, - AtomicOrdering MemOpOrder, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); - void expandPartwordAtomicRMW( - AtomicRMWInst *I, - TargetLoweringBase::AtomicExpansionKind ExpansionKind); - AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); - bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); - void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); - void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); - - AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); - static Value *insertRMWCmpXchgLoop( - IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign, - AtomicOrdering MemOpOrder, SyncScope::ID SSID, - function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); - bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - bool isIdempotentRMW(AtomicRMWInst *RMWI); - bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); - - bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, - Value *PointerOperand, Value *ValueOperand, - Value *CASExpected, AtomicOrdering Ordering, - AtomicOrdering Ordering2, - ArrayRef<RTLIB::Libcall> Libcalls); - void expandAtomicLoadToLibcall(LoadInst *LI); - void expandAtomicStoreToLibcall(StoreInst *LI); - void expandAtomicRMWToLibcall(AtomicRMWInst *I); - void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); - - friend bool - llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg); - }; + bool runOnFunction(Function &F) override; + +private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); + bool tryExpandAtomicLoad(LoadInst *LI); + bool expandAtomicLoadToLL(LoadInst *LI); + bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); + bool tryExpandAtomicStore(StoreInst *SI); + void expandAtomicStore(StoreInst *SI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); + AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI); + Value * + insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + Align AddrAlign, AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); + void + expandAtomicOpToLLSC(Instruction *I, Type *ResultTy, Value *Addr, + Align AddrAlign, AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); + void expandPartwordAtomicRMW( + AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind); + AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI); + bool expandPartwordCmpXchg(AtomicCmpXchgInst *I); + void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI); + void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI); + + AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); + static Value * + insertRMWCmpXchgLoop(IRBuilder<> &Builder, Type *ResultType, Value *Addr, + Align AddrAlign, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, + CreateCmpXchgInstFun CreateCmpXchg); + bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + bool isIdempotentRMW(AtomicRMWInst *RMWI); + bool simplifyIdempotentRMW(AtomicRMWInst *RMWI); + + bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment, + Value *PointerOperand, Value *ValueOperand, + Value *CASExpected, AtomicOrdering Ordering, + AtomicOrdering Ordering2, + ArrayRef<RTLIB::Libcall> Libcalls); + void expandAtomicLoadToLibcall(LoadInst *LI); + void expandAtomicStoreToLibcall(StoreInst *LI); + void expandAtomicRMWToLibcall(AtomicRMWInst *I); + void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); + + friend bool + llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg); +}; } // end anonymous namespace @@ -129,8 +131,8 @@ char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", - false, false) +INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, + false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } @@ -252,7 +254,8 @@ bool AtomicExpand::runOnFunction(Function &F) { } if (LI) { - if (LI->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicLoadInIR(LI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. LI = convertAtomicLoadToIntegerType(LI); @@ -262,7 +265,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange |= tryExpandAtomicLoad(LI); } else if (SI) { - if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicStoreInIR(SI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. SI = convertAtomicStoreToIntegerType(SI); @@ -271,8 +275,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } - if (TLI->shouldExpandAtomicStoreInIR(SI)) - MadeChange |= expandAtomicStore(SI); + if (tryExpandAtomicStore(SI)) + MadeChange = true; } else if (RMWI) { // There are two different ways of expanding RMW instructions: // - into a load if it is idempotent @@ -283,8 +287,8 @@ bool AtomicExpand::runOnFunction(Function &F) { MadeChange = true; } else { AtomicRMWInst::BinOp Op = RMWI->getOperation(); - if (Op == AtomicRMWInst::Xchg && - RMWI->getValOperand()->getType()->isFloatingPointTy()) { + if (TLI->shouldCastAtomicRMWIInIR(RMWI) == + TargetLoweringBase::AtomicExpansionKind::CastToInteger) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. RMWI = convertAtomicXchgToIntegerType(RMWI); @@ -308,7 +312,7 @@ bool AtomicExpand::runOnFunction(Function &F) { // extend convertCmpXchgToInteger for floating point too. assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() && "unimplemented - floating point not legal at IR level"); - if (CASI->getCompareOperand()->getType()->isPointerTy() ) { + if (CASI->getCompareOperand()->getType()->isPointerTy()) { // TODO: add a TLI hook to control this so that each target can // convert to lowering the original type one at a time. CASI = convertCmpXchgToIntegerType(CASI); @@ -351,14 +355,12 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, /// convertAtomicStoreToIntegerType for background. LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); - Type *NewTy = getCorrespondingIntegerType(LI->getType(), - M->getDataLayout()); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout()); IRBuilder<> Builder(LI); Value *Addr = LI->getPointerOperand(); - Type *PT = PointerType::get(NewTy, - Addr->getType()->getPointerAddressSpace()); + Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); auto *NewLI = Builder.CreateLoad(NewTy, NewAddr); @@ -385,7 +387,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { Value *Val = RMWI->getValOperand(); Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); - Value *NewVal = Builder.CreateBitCast(Val, NewTy); + Value *NewVal = Val->getType()->isPointerTy() + ? Builder.CreatePtrToInt(Val, NewTy) + : Builder.CreateBitCast(Val, NewTy); auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal, @@ -393,7 +397,9 @@ AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) { NewRMWI->setVolatile(RMWI->isVolatile()); LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n"); - Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType()); + Value *NewRVal = RMWI->getType()->isPointerTy() + ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType()) + : Builder.CreateBitCast(NewRMWI, RMWI->getType()); RMWI->replaceAllUsesWith(NewRVal); RMWI->eraseFromParent(); return NewRMWI; @@ -413,11 +419,29 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { return expandAtomicLoadToLL(LI); case TargetLoweringBase::AtomicExpansionKind::CmpXChg: return expandAtomicLoadToCmpXchg(LI); + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + LI->setAtomic(AtomicOrdering::NotAtomic); + return true; default: llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); } } +bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) { + switch (TLI->shouldExpandAtomicStoreInIR(SI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::Expand: + expandAtomicStore(SI); + return true; + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + SI->setAtomic(AtomicOrdering::NotAtomic); + return true; + default: + llvm_unreachable("Unhandled case in tryExpandAtomicStore"); + } +} + bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { IRBuilder<> Builder(LI); @@ -471,8 +495,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); Value *Addr = SI->getPointerOperand(); - Type *PT = PointerType::get(NewTy, - Addr->getType()->getPointerAddressSpace()); + Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); @@ -484,7 +507,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { return NewSI; } -bool AtomicExpand::expandAtomicStore(StoreInst *SI) { +void AtomicExpand::expandAtomicStore(StoreInst *SI) { // This function is only called on atomic stores that are too large to be // atomic if implemented as a native store. So we replace them by an // atomic swap, that can be implemented for example as a ldrex/strex on ARM @@ -498,7 +521,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) { SI->eraseFromParent(); // Now we have an appropriate swap instruction, lower it as usual. - return tryExpandAtomicRMW(AI); + tryExpandAtomicRMW(AI); } static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, @@ -508,6 +531,7 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Type *OrigTy = NewVal->getType(); // This code can go away when cmpxchg supports FP types. + assert(!OrigTy->isPointerTy()); bool NeedBitcast = OrigTy->isFloatingPointTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); @@ -527,47 +551,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy); } -/// Emit IR to implement the given atomicrmw operation on values in registers, -/// returning the new value. -static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, - Value *Loaded, Value *Inc) { - Value *NewVal; - switch (Op) { - case AtomicRMWInst::Xchg: - return Inc; - case AtomicRMWInst::Add: - return Builder.CreateAdd(Loaded, Inc, "new"); - case AtomicRMWInst::Sub: - return Builder.CreateSub(Loaded, Inc, "new"); - case AtomicRMWInst::And: - return Builder.CreateAnd(Loaded, Inc, "new"); - case AtomicRMWInst::Nand: - return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); - case AtomicRMWInst::Or: - return Builder.CreateOr(Loaded, Inc, "new"); - case AtomicRMWInst::Xor: - return Builder.CreateXor(Loaded, Inc, "new"); - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, Inc); - return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); - case AtomicRMWInst::FAdd: - return Builder.CreateFAdd(Loaded, Inc, "new"); - case AtomicRMWInst::FSub: - return Builder.CreateFSub(Loaded, Inc, "new"); - default: - llvm_unreachable("Unknown atomic op"); - } -} - bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { LLVMContext &Ctx = AI->getModule()->getContext(); TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); @@ -582,8 +565,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { TargetLoweringBase::AtomicExpansionKind::LLSC); } else { auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { - return performAtomicOp(AI->getOperation(), Builder, Loaded, - AI->getValOperand()); + return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, + AI->getValOperand()); }; expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), PerformOp); @@ -621,6 +604,12 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { expandAtomicRMWToMaskedIntrinsic(AI); return true; } + case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: { + TLI->emitBitTestAtomicRMWIntrinsic(AI); + return true; + } + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + return lowerAtomicRMWInst(AI); default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } @@ -703,7 +692,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, PMV.AlignedAddr = Addr; PMV.AlignedAddrAlignment = AddrAlign; PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); - PMV.Mask = ConstantInt::get(PMV.ValueType, ~0); + PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true); return PMV; } @@ -787,7 +776,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, case AtomicRMWInst::Sub: case AtomicRMWInst::Nand: { // The other arithmetic ops need to be masked into place. - Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); + Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc); Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); @@ -801,7 +790,7 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, // truncate down to the original size, and expand out again after // doing the operation. Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV); - Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc); + Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc); Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV); return FinalVal; } @@ -840,9 +829,8 @@ void AtomicExpand::expandPartwordAtomicRMW( Value *OldResult; if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, - PMV.AlignedAddrAlignment, MemOpOrder, - SSID, PerformPartwordOp, - createCmpXchgInstFun); + PMV.AlignedAddrAlignment, MemOpOrder, SSID, + PerformPartwordOp, createCmpXchgInstFun); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, @@ -1106,7 +1094,7 @@ Value *AtomicExpand::insertRMWLLSCLoop( // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place). @@ -1135,7 +1123,8 @@ Value *AtomicExpand::insertRMWLLSCLoop( /// IR. As a migration step, we convert back to what use to be the standard /// way to represent a pointer cmpxchg so that we can update backends one by /// one. -AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { +AtomicCmpXchgInst * +AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { auto *M = CI->getModule(); Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), M->getDataLayout()); @@ -1143,8 +1132,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * IRBuilder<> Builder(CI); Value *Addr = CI->getPointerOperand(); - Type *PT = PointerType::get(NewTy, - Addr->getType()->getPointerAddressSpace()); + Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace()); Value *NewAddr = Builder.CreateBitCast(Addr, PT); Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); @@ -1305,9 +1293,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); Value *NewValueInsert = insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV); - Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr, - MemOpOrder); + Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert, + PMV.AlignedAddr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1418,27 +1405,27 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { return true; } -bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { +bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) { auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); - if(!C) + if (!C) return false; AtomicRMWInst::BinOp Op = RMWI->getOperation(); - switch(Op) { - case AtomicRMWInst::Add: - case AtomicRMWInst::Sub: - case AtomicRMWInst::Or: - case AtomicRMWInst::Xor: - return C->isZero(); - case AtomicRMWInst::And: - return C->isMinusOne(); - // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... - default: - return false; + switch (Op) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + return C->isZero(); + case AtomicRMWInst::And: + return C->isMinusOne(); + // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... + default: + return false; } } -bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { +bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) { if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { tryExpandAtomicLoad(ResultingLoad); return true; @@ -1524,6 +1511,8 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) { case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: expandAtomicCmpXchgToMaskedIntrinsic(CI); return true; + case TargetLoweringBase::AtomicExpansionKind::NotAtomic: + return lowerAtomicCmpXchgInst(CI); } } @@ -1535,8 +1524,8 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(), AI->getOrdering(), AI->getSyncScopeID(), [&](IRBuilder<> &Builder, Value *Loaded) { - return performAtomicOp(AI->getOperation(), Builder, Loaded, - AI->getValOperand()); + return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, + AI->getValOperand()); }, CreateCmpXchg); @@ -1738,11 +1727,21 @@ bool AtomicExpand::expandAtomicOpToLibcall( RTLIB::Libcall RTLibType; if (UseSizedLibcall) { switch (Size) { - case 1: RTLibType = Libcalls[1]; break; - case 2: RTLibType = Libcalls[2]; break; - case 4: RTLibType = Libcalls[3]; break; - case 8: RTLibType = Libcalls[4]; break; - case 16: RTLibType = Libcalls[5]; break; + case 1: + RTLibType = Libcalls[1]; + break; + case 2: + RTLibType = Libcalls[2]; + break; + case 4: + RTLibType = Libcalls[3]; + break; + case 8: + RTLibType = Libcalls[4]; + break; + case 16: + RTLibType = Libcalls[5]; + break; } } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { RTLibType = Libcalls[0]; @@ -1806,8 +1805,8 @@ bool AtomicExpand::expandAtomicOpToLibcall( // that property, we'd need to extend this mechanism to support AS-specific // families of atomic intrinsics. auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace(); - Value *PtrVal = Builder.CreateBitCast(PointerOperand, - Type::getInt8PtrTy(Ctx, PtrTypeAS)); + Value *PtrVal = + Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS)); PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx)); Args.push_back(PtrVal); @@ -1815,11 +1814,10 @@ bool AtomicExpand::expandAtomicOpToLibcall( if (CASExpected) { AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); AllocaCASExpected->setAlignment(AllocaAlignment); - unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); + unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace(); - AllocaCASExpected_i8 = - Builder.CreateBitCast(AllocaCASExpected, - Type::getInt8PtrTy(Ctx, AllocaAS)); + AllocaCASExpected_i8 = Builder.CreateBitCast( + AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); Args.push_back(AllocaCASExpected_i8); @@ -1846,9 +1844,9 @@ bool AtomicExpand::expandAtomicOpToLibcall( if (!CASExpected && HasResult && !UseSizedLibcall) { AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); AllocaResult->setAlignment(AllocaAlignment); - unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); + unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace(); AllocaResult_i8 = - Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); + Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS)); Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); Args.push_back(AllocaResult_i8); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp index c1901bc46d72..f05f5b9f9947 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp @@ -60,7 +60,7 @@ // Basic Block Labels // ================== // -// With -fbasic-block-sections=labels, we emit the offsets of BB addresses of +// With -fbasic-block-sections=labels, we encode the offsets of BB addresses of // every function into the .llvm_bb_addr_map section. Along with the function // symbols, this allows for mapping of virtual addresses in PMU profiles back to // the corresponding basic blocks. This logic is implemented in AsmPrinter. This @@ -69,26 +69,17 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/LineIterator.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Target/TargetMachine.h" -using llvm::SmallSet; -using llvm::SmallVector; -using llvm::StringMap; -using llvm::StringRef; using namespace llvm; // Placing the cold clusters in a separate section mitigates against poor @@ -108,41 +99,11 @@ cl::opt<bool> BBSectionsDetectSourceDrift( namespace { -// This struct represents the cluster information for a machine basic block. -struct BBClusterInfo { - // MachineBasicBlock ID. - unsigned MBBNumber; - // Cluster ID this basic block belongs to. - unsigned ClusterID; - // Position of basic block within the cluster. - unsigned PositionInCluster; -}; - -using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>; - class BasicBlockSections : public MachineFunctionPass { public: static char ID; - // This contains the basic-block-sections profile. - const MemoryBuffer *MBuf = nullptr; - - // This encapsulates the BB cluster information for the whole program. - // - // For every function name, it contains the cluster information for (all or - // some of) its basic blocks. The cluster information for every basic block - // includes its cluster ID along with the position of the basic block in that - // cluster. - ProgramBBClusterInfoMapTy ProgramBBClusterInfo; - - // Some functions have alias names. We use this map to find the main alias - // name for which we have mapping in ProgramBBClusterInfo. - StringMap<StringRef> FuncAliasMap; - - BasicBlockSections(const MemoryBuffer *Buf) - : MachineFunctionPass(ID), MBuf(Buf) { - initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); - }; + BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; BasicBlockSections() : MachineFunctionPass(ID) { initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry()); @@ -154,9 +115,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; - /// Read profiles of basic blocks if available here. - bool doInitialization(Module &M) override; - /// Identify basic blocks that need separate sections and prepare to emit them /// accordingly. bool runOnMachineFunction(MachineFunction &MF) override; @@ -206,21 +164,18 @@ static void updateBranches( // This function provides the BBCluster information associated with a function. // Returns true if a valid association exists and false otherwise. -static bool getBBClusterInfoForFunction( - const MachineFunction &MF, const StringMap<StringRef> FuncAliasMap, - const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, +bool getBBClusterInfoForFunction( + const MachineFunction &MF, + BasicBlockSectionsProfileReader *BBSectionsProfileReader, std::vector<Optional<BBClusterInfo>> &V) { - // Get the main alias name for the function. - auto FuncName = MF.getName(); - auto R = FuncAliasMap.find(FuncName); - StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second; // Find the assoicated cluster information. - auto P = ProgramBBClusterInfo.find(AliasName); - if (P == ProgramBBClusterInfo.end()) + std::pair<bool, SmallVector<BBClusterInfo, 4>> P = + BBSectionsProfileReader->getBBClusterInfoForFunction(MF.getName()); + if (!P.first) return false; - if (P->second.empty()) { + if (P.second.empty()) { // This indicates that sections are desired for all basic blocks of this // function. We clear the BBClusterInfo vector to denote this. V.clear(); @@ -228,7 +183,7 @@ static bool getBBClusterInfoForFunction( } V.resize(MF.getNumBlockIDs()); - for (auto bbClusterInfo : P->second) { + for (auto bbClusterInfo : P.second) { // Bail out if the cluster information contains invalid MBB numbers. if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs()) return false; @@ -266,7 +221,7 @@ assignSections(MachineFunction &MF, // set every basic block's section ID equal to its number (basic block // id). This further ensures that basic blocks are ordered canonically. MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())}); - } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue()) + } else if (FuncBBClusterInfo[MBB.getNumber()]) MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID); else { // BB goes into the special cold section if it is not specified in the @@ -279,9 +234,8 @@ assignSections(MachineFunction &MF, // If we already have one cluster containing eh_pads, this must be updated // to ExceptionSectionID. Otherwise, we set it equal to the current // section ID. - EHPadsSectionID = EHPadsSectionID.hasValue() - ? MBBSectionID::ExceptionSectionID - : MBB.getSectionID(); + EHPadsSectionID = EHPadsSectionID ? MBBSectionID::ExceptionSectionID + : MBB.getSectionID(); } } @@ -290,7 +244,7 @@ assignSections(MachineFunction &MF, if (EHPadsSectionID == MBBSectionID::ExceptionSectionID) for (auto &MBB : MF) if (MBB.isEHPad()) - MBB.setSectionID(EHPadsSectionID.getValue()); + MBB.setSectionID(*EHPadsSectionID); } void llvm::sortBasicBlocksAndUpdateBranches( @@ -377,9 +331,11 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } + BBSectionsProfileReader = &getAnalysis<BasicBlockSectionsProfileReader>(); + std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo; if (BBSectionsType == BasicBlockSection::List && - !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo, + !getBBClusterInfoForFunction(MF, BBSectionsProfileReader, FuncBBClusterInfo)) return true; MF.setBBSectionsType(BBSectionsType); @@ -427,107 +383,12 @@ bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) { return true; } -// Basic Block Sections can be enabled for a subset of machine basic blocks. -// This is done by passing a file containing names of functions for which basic -// block sections are desired. Additionally, machine basic block ids of the -// functions can also be specified for a finer granularity. Moreover, a cluster -// of basic blocks could be assigned to the same section. -// A file with basic block sections for all of function main and three blocks -// for function foo (of which 1 and 2 are placed in a cluster) looks like this: -// ---------------------------- -// list.txt: -// !main -// !foo -// !!1 2 -// !!4 -static Error getBBClusterInfo(const MemoryBuffer *MBuf, - ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, - StringMap<StringRef> &FuncAliasMap) { - assert(MBuf); - line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); - - auto invalidProfileError = [&](auto Message) { - return make_error<StringError>( - Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + - Twine(LineIt.line_number()) + ": " + Message), - inconvertibleErrorCode()); - }; - - auto FI = ProgramBBClusterInfo.end(); - - // Current cluster ID corresponding to this function. - unsigned CurrentCluster = 0; - // Current position in the current cluster. - unsigned CurrentPosition = 0; - - // Temporary set to ensure every basic block ID appears once in the clusters - // of a function. - SmallSet<unsigned, 4> FuncBBIDs; - - for (; !LineIt.is_at_eof(); ++LineIt) { - StringRef S(*LineIt); - if (S[0] == '@') - continue; - // Check for the leading "!" - if (!S.consume_front("!") || S.empty()) - break; - // Check for second "!" which indicates a cluster of basic blocks. - if (S.consume_front("!")) { - if (FI == ProgramBBClusterInfo.end()) - return invalidProfileError( - "Cluster list does not follow a function name specifier."); - SmallVector<StringRef, 4> BBIndexes; - S.split(BBIndexes, ' '); - // Reset current cluster position. - CurrentPosition = 0; - for (auto BBIndexStr : BBIndexes) { - unsigned long long BBIndex; - if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) - return invalidProfileError(Twine("Unsigned integer expected: '") + - BBIndexStr + "'."); - if (!FuncBBIDs.insert(BBIndex).second) - return invalidProfileError(Twine("Duplicate basic block id found '") + - BBIndexStr + "'."); - if (!BBIndex && CurrentPosition) - return invalidProfileError("Entry BB (0) does not begin a cluster."); - - FI->second.emplace_back(BBClusterInfo{ - ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); - } - CurrentCluster++; - } else { // This is a function name specifier. - // Function aliases are separated using '/'. We use the first function - // name for the cluster info mapping and delegate all other aliases to - // this one. - SmallVector<StringRef, 4> Aliases; - S.split(Aliases, '/'); - for (size_t i = 1; i < Aliases.size(); ++i) - FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); - - // Prepare for parsing clusters of this function name. - // Start a new cluster map for this function name. - FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; - CurrentCluster = 0; - FuncBBIDs.clear(); - } - } - return Error::success(); -} - -bool BasicBlockSections::doInitialization(Module &M) { - if (!MBuf) - return false; - if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) - report_fatal_error(std::move(Err)); - return false; -} - void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired<BasicBlockSectionsProfileReader>(); MachineFunctionPass::getAnalysisUsage(AU); } -MachineFunctionPass * -llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) { - return new BasicBlockSections(Buf); +MachineFunctionPass *llvm::createBasicBlockSectionsPass() { + return new BasicBlockSections(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp new file mode 100644 index 000000000000..c2acf115998b --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -0,0 +1,144 @@ +//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of the basic block sections profile reader pass. It parses +// and stores the basic block sections profile file (which is specified via the +// `-basic-block-sections` flag). +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; + +char BasicBlockSectionsProfileReader::ID = 0; +INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader", + "Reads and parses a basic block sections profile.", false, + false) + +bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const { + return getBBClusterInfoForFunction(FuncName).first; +} + +std::pair<bool, SmallVector<BBClusterInfo>> +BasicBlockSectionsProfileReader::getBBClusterInfoForFunction( + StringRef FuncName) const { + std::pair<bool, SmallVector<BBClusterInfo>> cluster_info(false, {}); + auto R = ProgramBBClusterInfo.find(getAliasName(FuncName)); + if (R != ProgramBBClusterInfo.end()) { + cluster_info.second = R->second; + cluster_info.first = true; + } + return cluster_info; +} + +// Basic Block Sections can be enabled for a subset of machine basic blocks. +// This is done by passing a file containing names of functions for which basic +// block sections are desired. Additionally, machine basic block ids of the +// functions can also be specified for a finer granularity. Moreover, a cluster +// of basic blocks could be assigned to the same section. +// A file with basic block sections for all of function main and three blocks +// for function foo (of which 1 and 2 are placed in a cluster) looks like this: +// ---------------------------- +// list.txt: +// !main +// !foo +// !!1 2 +// !!4 +static Error getBBClusterInfo(const MemoryBuffer *MBuf, + ProgramBBClusterInfoMapTy &ProgramBBClusterInfo, + StringMap<StringRef> &FuncAliasMap) { + assert(MBuf); + line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); + + auto invalidProfileError = [&](auto Message) { + return make_error<StringError>( + Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " + + Twine(LineIt.line_number()) + ": " + Message), + inconvertibleErrorCode()); + }; + + auto FI = ProgramBBClusterInfo.end(); + + // Current cluster ID corresponding to this function. + unsigned CurrentCluster = 0; + // Current position in the current cluster. + unsigned CurrentPosition = 0; + + // Temporary set to ensure every basic block ID appears once in the clusters + // of a function. + SmallSet<unsigned, 4> FuncBBIDs; + + for (; !LineIt.is_at_eof(); ++LineIt) { + StringRef S(*LineIt); + if (S[0] == '@') + continue; + // Check for the leading "!" + if (!S.consume_front("!") || S.empty()) + break; + // Check for second "!" which indicates a cluster of basic blocks. + if (S.consume_front("!")) { + if (FI == ProgramBBClusterInfo.end()) + return invalidProfileError( + "Cluster list does not follow a function name specifier."); + SmallVector<StringRef, 4> BBIndexes; + S.split(BBIndexes, ' '); + // Reset current cluster position. + CurrentPosition = 0; + for (auto BBIndexStr : BBIndexes) { + unsigned long long BBIndex; + if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex)) + return invalidProfileError(Twine("Unsigned integer expected: '") + + BBIndexStr + "'."); + if (!FuncBBIDs.insert(BBIndex).second) + return invalidProfileError(Twine("Duplicate basic block id found '") + + BBIndexStr + "'."); + if (!BBIndex && CurrentPosition) + return invalidProfileError("Entry BB (0) does not begin a cluster."); + + FI->second.emplace_back(BBClusterInfo{ + ((unsigned)BBIndex), CurrentCluster, CurrentPosition++}); + } + CurrentCluster++; + } else { // This is a function name specifier. + // Function aliases are separated using '/'. We use the first function + // name for the cluster info mapping and delegate all other aliases to + // this one. + SmallVector<StringRef, 4> Aliases; + S.split(Aliases, '/'); + for (size_t i = 1; i < Aliases.size(); ++i) + FuncAliasMap.try_emplace(Aliases[i], Aliases.front()); + + // Prepare for parsing clusters of this function name. + // Start a new cluster map for this function name. + FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first; + CurrentCluster = 0; + FuncBBIDs.clear(); + } + } + return Error::success(); +} + +void BasicBlockSectionsProfileReader::initializePass() { + if (!MBuf) + return; + if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap)) + report_fatal_error(std::move(Err)); +} + +ImmutablePass * +llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) { + return new BasicBlockSectionsProfileReader(Buf); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp index 0ff67f7ca00a..07be03d2dab9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -32,11 +33,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSizeOpts.h" -#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -105,6 +104,11 @@ namespace { AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } }; } // end anonymous namespace diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h index 95d5dcfbbd0f..d0b6ed5ebe05 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchFolding.h @@ -14,7 +14,6 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/Compiler.h" -#include <cstdint> #include <vector> namespace llvm { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp index eda0f37fdeb7..29508f8f35a6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp index 558700bd9b3b..57170c58db14 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -19,11 +19,13 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp new file mode 100644 index 000000000000..837dbd77d073 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIFixup.cpp @@ -0,0 +1,225 @@ +//===------ CFIFixup.cpp - Insert CFI remember/restore instructions -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// + +// This pass inserts the necessary instructions to adjust for the inconsistency +// of the call-frame information caused by final machine basic block layout. +// The pass relies in constraints LLVM imposes on the placement of +// save/restore points (cf. ShrinkWrap): +// * there is a single basic block, containing the function prologue +// * possibly multiple epilogue blocks, where each epilogue block is +// complete and self-contained, i.e. CSR restore instructions (and the +// corresponding CFI instructions are not split across two or more blocks. +// * prologue and epilogue blocks are outside of any loops +// Thus, during execution, at the beginning and at the end of each basic block +// the function can be in one of two states: +// - "has a call frame", if the function has executed the prologue, and +// has not executed any epilogue +// - "does not have a call frame", if the function has not executed the +// prologue, or has executed an epilogue +// which can be computed by a single RPO traversal. + +// In order to accommodate backends which do not generate unwind info in +// epilogues we compute an additional property "strong no call frame on entry", +// which is set for the entry point of the function and for every block +// reachable from the entry along a path that does not execute the prologue. If +// this property holds, it takes precedence over the "has a call frame" +// property. + +// From the point of view of the unwind tables, the "has/does not have call +// frame" state at beginning of each block is determined by the state at the end +// of the previous block, in layout order. Where these states differ, we insert +// compensating CFI instructions, which come in two flavours: + +// - CFI instructions, which reset the unwind table state to the initial one. +// This is done by a target specific hook and is expected to be trivial +// to implement, for example it could be: +// .cfi_def_cfa <sp>, 0 +// .cfi_same_value <rN> +// .cfi_same_value <rN-1> +// ... +// where <rN> are the callee-saved registers. +// - CFI instructions, which reset the unwind table state to the one +// created by the function prologue. These are +// .cfi_restore_state +// .cfi_remember_state +// In this case we also insert a `.cfi_remember_state` after the last CFI +// instruction in the function prologue. +// +// Known limitations: +// * the pass cannot handle an epilogue preceding the prologue in the basic +// block layout +// * the pass does not handle functions where SP is used as a frame pointer and +// SP adjustments up and down are done in different basic blocks (TODO) +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CFIFixup.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "cfi-fixup" + +char CFIFixup::ID = 0; + +INITIALIZE_PASS(CFIFixup, "cfi-fixup", + "Insert CFI remember/restore state instructions", false, false) +FunctionPass *llvm::createCFIFixup() { return new CFIFixup(); } + +static bool isPrologueCFIInstruction(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && + MI.getFlag(MachineInstr::FrameSetup); +} + +static bool containsPrologue(const MachineBasicBlock &MBB) { + return llvm::any_of(MBB.instrs(), isPrologueCFIInstruction); +} + +static bool containsEpilogue(const MachineBasicBlock &MBB) { + return llvm::any_of(llvm::reverse(MBB), [](const auto &MI) { + return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && + MI.getFlag(MachineInstr::FrameDestroy); + }); +} + +bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { + const TargetFrameLowering &TFL = *MF.getSubtarget().getFrameLowering(); + if (!TFL.enableCFIFixup(MF)) + return false; + + const unsigned NumBlocks = MF.getNumBlockIDs(); + if (NumBlocks < 2) + return false; + + struct BlockFlags { + bool Reachable : 1; + bool StrongNoFrameOnEntry : 1; + bool HasFrameOnEntry : 1; + bool HasFrameOnExit : 1; + }; + SmallVector<BlockFlags, 32> BlockInfo(NumBlocks, {false, false, false, false}); + BlockInfo[0].Reachable = true; + BlockInfo[0].StrongNoFrameOnEntry = true; + + // Compute the presence/absence of frame at each basic block. + MachineBasicBlock *PrologueBlock = nullptr; + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + for (MachineBasicBlock *MBB : RPOT) { + BlockFlags &Info = BlockInfo[MBB->getNumber()]; + + // Set to true if the current block contains the prologue or the epilogue, + // respectively. + bool HasPrologue = false; + bool HasEpilogue = false; + + if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) { + PrologueBlock = MBB; + HasPrologue = true; + } + + if (Info.HasFrameOnEntry || HasPrologue) + HasEpilogue = containsEpilogue(*MBB); + + // If the function has a call frame at the entry of the current block or the + // current block contains the prologue, then the function has a call frame + // at the exit of the block, unless the block contains the epilogue. + Info.HasFrameOnExit = (Info.HasFrameOnEntry || HasPrologue) && !HasEpilogue; + + // Set the successors' state on entry. + for (MachineBasicBlock *Succ : MBB->successors()) { + BlockFlags &SuccInfo = BlockInfo[Succ->getNumber()]; + SuccInfo.Reachable = true; + SuccInfo.StrongNoFrameOnEntry |= + Info.StrongNoFrameOnEntry && !HasPrologue; + SuccInfo.HasFrameOnEntry = Info.HasFrameOnExit; + } + } + + if (!PrologueBlock) + return false; + + // Walk the blocks of the function in "physical" order. + // Every block inherits the frame state (as recorded in the unwind tables) + // of the previous block. If the intended frame state is different, insert + // compensating CFI instructions. + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + bool Change = false; + // `InsertPt` always points to the point in a preceding block where we have to + // insert a `.cfi_remember_state`, in the case that the current block needs a + // `.cfi_restore_state`. + MachineBasicBlock *InsertMBB = PrologueBlock; + MachineBasicBlock::iterator InsertPt = PrologueBlock->begin(); + for (MachineInstr &MI : *PrologueBlock) + if (isPrologueCFIInstruction(MI)) + InsertPt = std::next(MI.getIterator()); + + assert(InsertPt != PrologueBlock->begin() && + "Inconsistent notion of \"prologue block\""); + + // No point starting before the prologue block. + // TODO: the unwind tables will still be incorrect if an epilogue physically + // preceeds the prologue. + MachineFunction::iterator CurrBB = std::next(PrologueBlock->getIterator()); + bool HasFrame = BlockInfo[PrologueBlock->getNumber()].HasFrameOnExit; + while (CurrBB != MF.end()) { + const BlockFlags &Info = BlockInfo[CurrBB->getNumber()]; + if (!Info.Reachable) { + ++CurrBB; + continue; + } + +#ifndef NDEBUG + if (!Info.StrongNoFrameOnEntry) { + for (auto *Pred : CurrBB->predecessors()) { + BlockFlags &PredInfo = BlockInfo[Pred->getNumber()]; + assert((!PredInfo.Reachable || + Info.HasFrameOnEntry == PredInfo.HasFrameOnExit) && + "Inconsistent call frame state"); + } + } +#endif + if (!Info.StrongNoFrameOnEntry && Info.HasFrameOnEntry && !HasFrame) { + // Reset to the "after prologue" state. + + // Insert a `.cfi_remember_state` into the last block known to have a + // stack frame. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + BuildMI(*InsertMBB, InsertPt, DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + // Insert a `.cfi_restore_state` at the beginning of the current block. + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + InsertPt = BuildMI(*CurrBB, CurrBB->begin(), DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + ++InsertPt; + InsertMBB = &*CurrBB; + Change = true; + } else if ((Info.StrongNoFrameOnEntry || !Info.HasFrameOnEntry) && + HasFrame) { + // Reset to the state upon function entry. + TFL.resetCFIToInitialState(*CurrBB); + Change = true; + } + + HasFrame = Info.HasFrameOnExit; + ++CurrBB; + } + + return Change; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp index de173a9dfd62..42523c47a671 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -19,16 +19,14 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SetOperations.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCDwarf.h" using namespace llvm; static cl::opt<bool> VerifyCFI("verify-cfiinstrs", diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp index 84a0e4142bb6..689e49978d43 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -145,11 +145,6 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) { LI.setWeight(Weight); } -float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start, - SlotIndex End) { - return weightCalcHelper(LI, &Start, &End); -} - float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, SlotIndex *End) { MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp index c9246f6e8754..f74ff30ab2e1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CallingConvLower.cpp @@ -14,16 +14,14 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> using namespace llvm; @@ -72,15 +70,9 @@ bool CCState::IsShadowAllocatedReg(MCRegister Reg) const { if (!isAllocated(Reg)) return false; - for (auto const &ValAssign : Locs) { - if (ValAssign.isRegLoc()) { - for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true); - AI.isValid(); ++AI) { - if (*AI == Reg) - return false; - } - } - } + for (auto const &ValAssign : Locs) + if (ValAssign.isRegLoc() && TRI.regsOverlap(ValAssign.getLocReg(), Reg)) + return false; return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp index 7c236a9785d8..5050395fbc0f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGen.cpp @@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCFGuardLongjmpPass(Registry); + initializeCFIFixupPass(Registry); initializeCFIInstrInserterPass(Registry); initializeCheckDebugMachineModulePass(Registry); initializeCodeGenPreparePass(Registry); @@ -50,6 +51,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeIndirectBrExpandPassPass(Registry); initializeInterleavedLoadCombinePass(Registry); initializeInterleavedAccessPass(Registry); + initializeJMCInstrumenterPass(Registry); initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); @@ -57,6 +59,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); + initializeLowerGlobalDtorsLegacyPassPass(Registry); initializeLowerIntrinsicsPass(Registry); initializeMIRAddFSDiscriminatorsPass(Registry); initializeMIRCanonicalizerPass(Registry); @@ -104,6 +107,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeRemoveRedundantDebugValuesPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); + initializeSelectOptimizePass(Registry); initializeShadowStackGCLoweringPass(Registry); initializeShrinkWrapPass(Registry); initializeSjLjEHPreparePass(Registry); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp index 877aa69c3e58..8f185a161bd0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenCommonISel.cpp @@ -129,7 +129,9 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, MachineBasicBlock::iterator Start = BB->begin(); MachineBasicBlock::iterator Previous = SplitPoint; - --Previous; + do { + --Previous; + } while (Previous != Start && Previous->isDebugInstr()); if (TII.isTailCall(*SplitPoint) && Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { @@ -142,7 +144,7 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, // ADJCALLSTACKUP ... // TAILJMP somewhere // On the other hand, it could be an unrelated call in which case this tail - // call has to register moves of its own and should be the split point. For + // call has no register moves of its own and should be the split point. For // example: // ADJCALLSTACKDOWN // CALL something_else @@ -167,3 +169,31 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB, return SplitPoint; } + +unsigned llvm::getInvertedFPClassTest(unsigned Test) { + unsigned InvertedTest = ~Test & fcAllFlags; + switch (InvertedTest) { + default: + break; + case fcNan: + case fcSNan: + case fcQNan: + case fcInf: + case fcPosInf: + case fcNegInf: + case fcNormal: + case fcPosNormal: + case fcNegNormal: + case fcSubnormal: + case fcPosSubnormal: + case fcNegSubnormal: + case fcZero: + case fcPosZero: + case fcNegZero: + case fcFinite: + case fcPosFinite: + case fcNegFinite: + return InvertedTest; + } + return 0; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index c888adeafca5..6778af22f532 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -23,16 +23,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -174,12 +173,11 @@ static cl::opt<bool> DisablePreheaderProtect( cl::desc("Disable protection against removing loop preheaders")); static cl::opt<bool> ProfileGuidedSectionPrefix( - "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, + "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt<bool> ProfileUnknownInSpecialSection( - "profile-unknown-in-special-section", cl::Hidden, cl::init(false), - cl::ZeroOrMore, + "profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " @@ -188,6 +186,15 @@ static cl::opt<bool> ProfileUnknownInSpecialSection( "to handle it in a different way than .text section, to save " "RAM for example. ")); +static cl::opt<bool> BBSectionsGuidedSectionPrefix( + "bbsections-guided-section-prefix", cl::Hidden, cl::init(true), + cl::desc("Use the basic-block-sections profile to determine the text " + "section prefix for hot functions. Functions with " + "basic-block-sections profile will be placed in `.text.hot` " + "regardless of their FDO profile info. Other functions won't be " + "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " + "profiles.")); + static cl::opt<unsigned> FreqRatioToSkipMerge( "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " @@ -274,6 +281,7 @@ class TypePromotionTransaction; const TargetLowering *TLI = nullptr; const TargetRegisterInfo *TRI; const TargetTransformInfo *TTI = nullptr; + const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr; const TargetLibraryInfo *TLInfo; const LoopInfo *LI; std::unique_ptr<BlockFrequencyInfo> BFI; @@ -349,6 +357,7 @@ class TypePromotionTransaction; AU.addRequired<TargetPassConfig>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<LoopInfoWrapperPass>(); + AU.addUsedIfAvailable<BasicBlockSectionsProfileReader>(); } private: @@ -401,6 +410,8 @@ class TypePromotionTransaction; bool optimizeFunnelShift(IntrinsicInst *Fsh); bool optimizeSelectInst(SelectInst *SI); bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); + bool optimizeSwitchType(SwitchInst *SI); + bool optimizeSwitchPhiConstants(SwitchInst *SI); bool optimizeSwitchInst(SwitchInst *SI); bool optimizeExtractElementInst(Instruction *Inst); bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); @@ -442,6 +453,7 @@ char CodeGenPrepare::ID = 0; INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -473,8 +485,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(new BranchProbabilityInfo(F, *LI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + BBSectionsProfileReader = + getAnalysisIfAvailable<BasicBlockSectionsProfileReader>(); OptSize = F.hasOptSize(); - if (ProfileGuidedSectionPrefix) { + // Use the basic-block-sections profile to promote hot functions to .text.hot if requested. + if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader && + BBSectionsProfileReader->isFunctionHot(F.getName())) { + F.setSectionPrefix("hot"); + } else if (ProfileGuidedSectionPrefix) { // The hot attribute overwrites profile count based hotness while profile // counts based hotness overwrite the cold attribute. // This is a conservative behabvior. @@ -524,7 +542,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Split some critical edges where one of the sources is an indirect branch, // to help generate sane code for PHIs involving such edges. - EverMadeChange |= SplitIndirectBrCriticalEdges(F); + EverMadeChange |= + SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true); bool MadeChange = true; while (MadeChange) { @@ -2037,7 +2056,8 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return false; // Bail if the value is never zero. - if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL)) + Use &Op = CountZeros->getOperandUse(0); + if (isKnownNonZero(Op, *DL)) return false; // The intrinsic will be sunk behind a compare against zero and branch. @@ -2058,7 +2078,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // Replace the unconditional branch that was created by the first split with // a compare against zero and a conditional branch. Value *Zero = Constant::getNullValue(Ty); - Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); + // Avoid introducing branch on poison. This also replaces the ctz operand. + if (!isGuaranteedNotToBeUndefOrPoison(Op)) + Op = Builder.CreateFreeze(Op, Op->getName() + ".fr"); + Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz"); Builder.CreateCondBr(Cmp, EndBlock, CallBlock); StartBlock->getTerminator()->eraseFromParent(); @@ -2101,7 +2124,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { // Align the pointer arguments to this call if the target thinks it's a good // idea - unsigned MinSize, PrefAlign; + unsigned MinSize; + Align PrefAlign; if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { for (auto &Arg : CI->args()) { // We want to align both objects whose address is used directly and @@ -2115,12 +2139,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); uint64_t Offset2 = Offset.getLimitedValue(); - if ((Offset2 & (PrefAlign-1)) != 0) + if (!isAligned(PrefAlign, Offset2)) continue; AllocaInst *AI; - if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && + if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign && DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) - AI->setAlignment(Align(PrefAlign)); + AI->setAlignment(PrefAlign); // Global variables can only be aligned if they are defined in this // object (i.e. they are uniquely initialized in this object), and // over-aligning global variables that have an explicit section is @@ -2130,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { GV->getPointerAlignment(*DL) < PrefAlign && DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) - GV->setAlignment(MaybeAlign(PrefAlign)); + GV->setAlignment(PrefAlign); } // If this is a memcpy (or similar) then we may be able to improve the // alignment @@ -3371,7 +3395,7 @@ public: if (!Visited.insert(P).second) continue; if (auto *PI = dyn_cast<Instruction>(P)) - if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) { + if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) { for (auto *U : PI->users()) WorkList.push_back(cast<Value>(U)); Put(PI, V); @@ -3416,7 +3440,7 @@ public: void destroyNewNodes(Type *CommonType) { // For safe erasing, replace the uses with dummy value first. - auto *Dummy = UndefValue::get(CommonType); + auto *Dummy = PoisonValue::get(CommonType); for (auto *I : AllPhiNodes) { I->replaceAllUsesWith(Dummy); I->eraseFromParent(); @@ -3785,7 +3809,7 @@ private: SmallVector<Value *, 32> Worklist; assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) && "Address must be a Phi or Select node"); - auto *Dummy = UndefValue::get(CommonType); + auto *Dummy = PoisonValue::get(CommonType); Worklist.push_back(Original); while (!Worklist.empty()) { Value *Current = Worklist.pop_back_val(); @@ -4550,9 +4574,9 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); if (!RHS || RHS->getBitWidth() > 64) return false; - int64_t Scale = RHS->getSExtValue(); - if (Opcode == Instruction::Shl) - Scale = 1LL << Scale; + int64_t Scale = Opcode == Instruction::Shl + ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1) + : RHS->getSExtValue(); return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); } @@ -4783,7 +4807,6 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { } // It isn't profitable to do this, roll back. - //cerr << "NOT FOLDING: " << *I; AddrMode = BackupAddrMode; AddrModeInsts.resize(OldSize); TPT.rollback(LastKnownGood); @@ -4836,7 +4859,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, TLI.ComputeConstraintToUse(OpInfo, SDValue()); // If this asm operand is our Value*, and if it isn't an indirect memory - // operand, we can't fold it! + // operand, we can't fold it! TODO: Also handle C_Address? if (OpInfo.CallOperandVal == OpVal && (OpInfo.ConstraintType != TargetLowering::C_Memory || !OpInfo.isIndirect)) @@ -5158,8 +5181,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // GEP, collect the GEP. Skip the GEPs that are the new bases of // previously split data structures. LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); - if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end()) - LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size(); + LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size())); } NewAddrMode.OriginalValue = V; @@ -5323,11 +5345,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - ResultPtr = - AddrMode.InBounds - ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, - "sunkaddr") - : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, + "sunkaddr", AddrMode.InBounds); } ResultIndex = V; @@ -5338,11 +5357,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } else { if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); - SunkAddr = - AddrMode.InBounds - ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, - "sunkaddr") - : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr", + AddrMode.InBounds); } if (SunkAddr->getType() != Addr->getType()) @@ -5619,6 +5635,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); + // TODO: Also handle C_Address? if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = CS->getArgOperand(ArgNo++); @@ -6002,31 +6019,25 @@ bool CodeGenPrepare::optimizePhiType( for (Value *V : Phi->incoming_values()) { if (auto *OpPhi = dyn_cast<PHINode>(V)) { if (!PhiNodes.count(OpPhi)) { - if (Visited.count(OpPhi)) + if (!Visited.insert(OpPhi).second) return false; PhiNodes.insert(OpPhi); - Visited.insert(OpPhi); Worklist.push_back(OpPhi); } } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) { if (!OpLoad->isSimple()) return false; - if (!Defs.count(OpLoad)) { - Defs.insert(OpLoad); + if (Defs.insert(OpLoad).second) Worklist.push_back(OpLoad); - } } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) { - if (!Defs.count(OpEx)) { - Defs.insert(OpEx); + if (Defs.insert(OpEx).second) Worklist.push_back(OpEx); - } } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) { if (!ConvertTy) ConvertTy = OpBC->getOperand(0)->getType(); if (OpBC->getOperand(0)->getType() != ConvertTy) return false; - if (!Defs.count(OpBC)) { - Defs.insert(OpBC); + if (Defs.insert(OpBC).second) { Worklist.push_back(OpBC); AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) && !isa<ExtractElementInst>(OpBC->getOperand(0)); @@ -6127,7 +6138,7 @@ bool CodeGenPrepare::optimizePhiTypes(Function &F) { // Remove any old phi's that have been converted. for (auto *I : DeletedInstrs) { - I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } @@ -6979,12 +6990,12 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { return Changed; } -bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { +bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) { Value *Cond = SI->getCondition(); Type *OldType = Cond->getType(); LLVMContext &Context = Cond->getContext(); EVT OldVT = TLI->getValueType(*DL, OldType); - MVT RegType = TLI->getRegisterType(Context, OldVT); + MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT); unsigned RegWidth = RegType.getSizeInBits(); if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) @@ -7019,7 +7030,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { ExtInst->setDebugLoc(SI->getDebugLoc()); SI->setCondition(ExtInst); for (auto Case : SI->cases()) { - APInt NarrowConst = Case.getCaseValue()->getValue(); + const APInt &NarrowConst = Case.getCaseValue()->getValue(); APInt WideConst = (ExtType == Instruction::ZExt) ? NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); Case.setValue(ConstantInt::get(Context, WideConst)); @@ -7028,6 +7039,89 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { return true; } +bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) { + // The SCCP optimization tends to produce code like this: + // switch(x) { case 42: phi(42, ...) } + // Materializing the constant for the phi-argument needs instructions; So we + // change the code to: + // switch(x) { case 42: phi(x, ...) } + + Value *Condition = SI->getCondition(); + // Avoid endless loop in degenerate case. + if (isa<ConstantInt>(*Condition)) + return false; + + bool Changed = false; + BasicBlock *SwitchBB = SI->getParent(); + Type *ConditionType = Condition->getType(); + + for (const SwitchInst::CaseHandle &Case : SI->cases()) { + ConstantInt *CaseValue = Case.getCaseValue(); + BasicBlock *CaseBB = Case.getCaseSuccessor(); + // Set to true if we previously checked that `CaseBB` is only reached by + // a single case from this switch. + bool CheckedForSinglePred = false; + for (PHINode &PHI : CaseBB->phis()) { + Type *PHIType = PHI.getType(); + // If ZExt is free then we can also catch patterns like this: + // switch((i32)x) { case 42: phi((i64)42, ...); } + // and replace `(i64)42` with `zext i32 %x to i64`. + bool TryZExt = + PHIType->isIntegerTy() && + PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() && + TLI->isZExtFree(ConditionType, PHIType); + if (PHIType == ConditionType || TryZExt) { + // Set to true to skip this case because of multiple preds. + bool SkipCase = false; + Value *Replacement = nullptr; + for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) { + Value *PHIValue = PHI.getIncomingValue(I); + if (PHIValue != CaseValue) { + if (!TryZExt) + continue; + ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue); + if (!PHIValueInt || + PHIValueInt->getValue() != + CaseValue->getValue().zext(PHIType->getIntegerBitWidth())) + continue; + } + if (PHI.getIncomingBlock(I) != SwitchBB) + continue; + // We cannot optimize if there are multiple case labels jumping to + // this block. This check may get expensive when there are many + // case labels so we test for it last. + if (!CheckedForSinglePred) { + CheckedForSinglePred = true; + if (SI->findCaseDest(CaseBB) == nullptr) { + SkipCase = true; + break; + } + } + + if (Replacement == nullptr) { + if (PHIValue == CaseValue) { + Replacement = Condition; + } else { + IRBuilder<> Builder(SI); + Replacement = Builder.CreateZExt(Condition, PHIType); + } + } + PHI.setIncomingValue(I, Replacement); + Changed = true; + } + if (SkipCase) + break; + } + } + } + return Changed; +} + +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + bool Changed = optimizeSwitchType(SI); + Changed |= optimizeSwitchPhiConstants(SI); + return Changed; +} namespace { @@ -7777,7 +7871,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { + if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) { LargeOffsetGEPMap.erase(P); P->replaceAllUsesWith(V); P->eraseFromParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp index 1d50e1d22b95..fd52191882cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CommandFlags.cpp @@ -13,7 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" @@ -58,6 +63,7 @@ CGOPT(bool, EnableUnsafeFPMath) CGOPT(bool, EnableNoInfsFPMath) CGOPT(bool, EnableNoNaNsFPMath) CGOPT(bool, EnableNoSignedZerosFPMath) +CGOPT(bool, EnableApproxFuncFPMath) CGOPT(bool, EnableNoTrappingFPMath) CGOPT(bool, EnableAIXExtendedAltivecABI) CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath) @@ -73,6 +79,7 @@ CGOPT(bool, StackSymbolOrdering) CGOPT(bool, StackRealign) CGOPT(std::string, TrapFuncName) CGOPT(bool, UseCtors) +CGOPT(bool, LowerGlobalDtorsViaCxaAtExit) CGOPT(bool, RelaxELFRelocations) CGOPT_EXP(bool, DataSections) CGOPT_EXP(bool, FunctionSections) @@ -94,6 +101,7 @@ CGOPT(bool, ForceDwarfFrameSection) CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) CGOPT(unsigned, AlignLoops) +CGOPT(bool, JMCInstrument) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -218,6 +226,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(EnableNoSignedZerosFPMath); + static cl::opt<bool> EnableApproxFuncFPMath( + "enable-approx-func-fp-math", + cl::desc("Enable FP math optimizations that assume approx func"), + cl::init(false)); + CGBINDOPT(EnableApproxFuncFPMath); + static cl::opt<bool> EnableNoTrappingFPMath( "enable-no-trapping-fp-math", cl::desc("Enable setting the FP exceptions build " @@ -333,6 +347,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(UseCtors); + static cl::opt<bool> LowerGlobalDtorsViaCxaAtExit( + "lower-global-dtors-via-cxa-atexit", + cl::desc("Lower llvm.global_dtors (global destructors) via __cxa_atexit"), + cl::init(true)); + CGBINDOPT(LowerGlobalDtorsViaCxaAtExit); + static cl::opt<bool> RelaxELFRelocations( "relax-elf-relocations", cl::desc( @@ -457,6 +477,12 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::desc("Default alignment for loops")); CGBINDOPT(AlignLoops); + static cl::opt<bool> JMCInstrument( + "enable-jmc-instrument", + cl::desc("Instrument functions with a call to __CheckForDebuggerJustMyCode"), + cl::init(false)); + CGBINDOPT(JMCInstrument); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -493,6 +519,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.NoInfsFPMath = getEnableNoInfsFPMath(); Options.NoNaNsFPMath = getEnableNoNaNsFPMath(); Options.NoSignedZerosFPMath = getEnableNoSignedZerosFPMath(); + Options.ApproxFuncFPMath = getEnableApproxFuncFPMath(); Options.NoTrappingFPMath = getEnableNoTrappingFPMath(); DenormalMode::DenormalModeKind DenormKind = getDenormalFPMath(); @@ -509,9 +536,10 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt(); Options.StackSymbolOrdering = getStackSymbolOrdering(); Options.UseInitArray = !getUseCtors(); + Options.LowerGlobalDtorsViaCxaAtExit = getLowerGlobalDtorsViaCxaAtExit(); Options.RelaxELFRelocations = getRelaxELFRelocations(); Options.DataSections = - getExplicitDataSections().getValueOr(TheTriple.hasDefaultDataSections()); + getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections()); Options.FunctionSections = getFunctionSections(); Options.IgnoreXCOFFVisibility = getIgnoreXCOFFVisibility(); Options.XCOFFTracebackTable = getXCOFFTracebackTable(); @@ -531,6 +559,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex(); Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); + Options.JMCInstrument = getJMCInstrument(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); @@ -643,6 +672,7 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, HANDLE_BOOL_ATTR(EnableNoInfsFPMathView, "no-infs-fp-math"); HANDLE_BOOL_ATTR(EnableNoNaNsFPMathView, "no-nans-fp-math"); HANDLE_BOOL_ATTR(EnableNoSignedZerosFPMathView, "no-signed-zeros-fp-math"); + HANDLE_BOOL_ATTR(EnableApproxFuncFPMathView, "approx-func-fp-math"); if (DenormalFPMathView->getNumOccurrences() > 0 && !F.hasFnAttribute("denormal-fp-math")) { @@ -684,4 +714,3 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features, for (Function &F : M) setFunctionAttributes(CPU, Features, F); } - diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp index d38bacdb1aa7..42192f41dbda 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -30,10 +30,10 @@ #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 5579152f1ce0..ce00be634e9a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp index 1337e57f360b..565c8b405f82 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -28,12 +28,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <deque> @@ -93,7 +90,7 @@ private: LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, const MachineOperand &MO) const; - bool runOnce(MachineFunction &MF); + std::pair<bool, bool> runOnce(MachineFunction &MF); LaneBitmask determineInitialDefinedLanes(unsigned Reg); LaneBitmask determineInitialUsedLanes(unsigned Reg); @@ -487,7 +484,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, return true; } -bool DetectDeadLanes::runOnce(MachineFunction &MF) { +std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) { // First pass: Populate defs/uses of vregs with initial values unsigned NumVirtRegs = MRI->getNumVirtRegs(); for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { @@ -528,6 +525,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { dbgs() << "\n"; }); + bool Changed = false; bool Again = false; // Mark operands as dead/unused. for (MachineBasicBlock &MBB : MF) { @@ -544,6 +542,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); MO.setIsDead(); + Changed = true; } if (MO.readsReg()) { bool CrossCopy = false; @@ -551,10 +550,12 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); MO.setIsUndef(); + Changed = true; } else if (isUndefInput(MO, &CrossCopy)) { LLVM_DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " << MI); MO.setIsUndef(); + Changed = true; if (CrossCopy) Again = true; } @@ -563,7 +564,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) { } } - return Again; + return std::make_pair(Changed, Again); } bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { @@ -585,13 +586,16 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { WorklistMembers.resize(NumVirtRegs); DefinedByCopy.resize(NumVirtRegs); + bool Changed = false; bool Again; do { - Again = runOnce(MF); + bool LocalChanged; + std::tie(LocalChanged, Again) = runOnce(MF); + Changed |= LocalChanged; } while(Again); DefinedByCopy.clear(); WorklistMembers.clear(); delete[] VRegInfos; - return true; + return Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp index c18532946bf9..b26aa792bb93 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EHContGuardCatchret.cpp @@ -17,9 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp index 6a0da4dad3c1..32858d043383 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -17,10 +17,10 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,7 +30,6 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -664,8 +663,8 @@ void SSAIfConv::rewritePHIOperands() { PI.PHI->getOperand(i-1).setMBB(Head); PI.PHI->getOperand(i-2).setReg(DstReg); } else if (MBB == getFPred()) { - PI.PHI->RemoveOperand(i-1); - PI.PHI->RemoveOperand(i-2); + PI.PHI->removeOperand(i-1); + PI.PHI->removeOperand(i-2); } } LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp index 60ee1812ee2c..b2639636dda7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -19,7 +19,6 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Dominators.h" @@ -32,6 +31,10 @@ using namespace llvm; +namespace llvm { +class TargetLowering; +} + #define DEBUG_TYPE "expandmemcmp" STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); @@ -737,7 +740,7 @@ Value *MemCmpExpansion::getMemCmpExpansion() { static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, const bool IsBCmp) { NumMemCmpCalls++; // Early exit from expansion if -Oz. @@ -757,7 +760,8 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, } // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. - const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const bool IsUsedForZeroCmp = + IsBCmp || isOnlyUsedInZeroEqualityComparison(CI); bool OptForSize = CI->getFunction()->hasOptSize() || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); auto Options = TTI->enableMemCmpExpansion(OptForSize, @@ -861,7 +865,7 @@ bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI, LibFunc Func; if (TLI->getLibFunc(*CI, Func) && (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) { + expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) { return true; } } @@ -881,7 +885,7 @@ ExpandMemCmpPass::runImpl(Function &F, const TargetLibraryInfo *TLI, bool MadeChanges = false; for (auto BBIt = F.begin(); BBIt != F.end();) { if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, - DTU.hasValue() ? DTU.getPointer() : nullptr)) { + DTU ? DTU.getPointer() : nullptr)) { MadeChanges = true; // If changes were made, restart the function from the beginning, since // the structure of the function was changed. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index d9caa8ad42d0..086b4a4dcc47 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -13,8 +13,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -104,8 +102,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { if (MI->allDefsAreDead()) { MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(3); // SubIdx - MI->RemoveOperand(1); // Imm + MI->removeOperand(3); // SubIdx + MI->removeOperand(1); // Imm LLVM_DEBUG(dbgs() << "subreg: replaced by: " << *MI); return true; } @@ -117,8 +115,8 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { // We must leave %rax live. if (DstReg != InsReg) { MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(3); // SubIdx - MI->RemoveOperand(1); // Imm + MI->removeOperand(3); // SubIdx + MI->removeOperand(1); // Imm LLVM_DEBUG(dbgs() << "subreg: replace by: " << *MI); return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp index 2bcaf750911b..f08c47d220ea 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp @@ -14,12 +14,10 @@ #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp index bb8d2b3e9a78..7883a48d121c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -23,13 +23,11 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" using namespace llvm; @@ -115,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { OldOp.eraseFromParent(); } +static bool maySpeculateLanes(VPIntrinsic &VPI) { + // The result of VP reductions depends on the mask and evl. + if (isa<VPReductionIntrinsic>(VPI)) + return false; + // Fallback to whether the intrinsic is speculatable. + Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); + unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); + return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, + cast<Operator>(&VPI)); +} + //// } Helpers namespace { @@ -218,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, Value * CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, VPIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); @@ -298,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, Value * CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &VPI) { - assert((isSafeToSpeculativelyExecute(&VPI) || - VPI.canIgnoreVectorLengthParam()) && + assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && "Implicitly dropping %evl in non-speculatable operator!"); Value *Mask = VPI.getMaskParam(); @@ -473,9 +480,9 @@ struct TransformJob { bool isDone() const { return Strategy.shouldDoNothing(); } }; -void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) { - // Speculatable instructions do not strictly need predication. - if (isSafeToSpeculativelyExecute(&I)) { +void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { + // Operations with speculatable lanes do not strictly need predication. + if (maySpeculateLanes(VPI)) { // Converting a speculatable VP intrinsic means dropping %mask and %evl. // No need to expand %evl into the %mask only to ignore that code. if (LegalizeStrat.OpStrategy == VPLegalization::Convert) @@ -520,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() { if (!VPI) continue; auto VPStrat = getVPLegalizationStrategy(*VPI); - sanitizeStrategy(I, VPStrat); + sanitizeStrategy(*VPI, VPStrat); if (!VPStrat.shouldDoNothing()) Worklist.emplace_back(VPI, VPStrat); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp index c2194929e2e7..68304dd41db0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FEntryInserter.cpp @@ -13,12 +13,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp index 1d35b194f218..3ec666227651 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FaultMaps.cpp @@ -52,7 +52,7 @@ void FaultMaps::serializeToFaultMapSection() { // Create the section. MCSection *FaultMapSection = OutContext.getObjectFileInfo()->getFaultMapSection(); - OS.SwitchSection(FaultMapSection); + OS.switchSection(FaultMapSection); // Emit a dummy symbol to force section inclusion. OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp index 00040e92a829..329c9587e321 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FinalizeISel.cpp @@ -16,11 +16,9 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "finalize-isel" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp index ec6bf18b2769..252910fd9462 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp @@ -24,10 +24,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackMaps.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/Statepoint.h" #include "llvm/InitializePasses.h" @@ -156,12 +153,17 @@ static Register performCopyPropagation(Register Reg, RI = ++MachineBasicBlock::iterator(Def); IsKill = DestSrc->Source->isKill(); - // There are no uses of original register between COPY and STATEPOINT. - // There can't be any after STATEPOINT, so we can eliminate Def. if (!Use) { + // There are no uses of original register between COPY and STATEPOINT. + // There can't be any after STATEPOINT, so we can eliminate Def. LLVM_DEBUG(dbgs() << "spillRegisters: removing dead copy " << *Def); Def->eraseFromParent(); + } else if (IsKill) { + // COPY will remain in place, spill will be inserted *after* it, so it is + // not a kill of source anymore. + const_cast<MachineOperand *>(DestSrc->Source)->setIsKill(false); } + return SrcReg; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp index af5515cc6bfd..4d27143c5298 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCMetadata.cpp @@ -11,16 +11,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> #include <cassert> #include <memory> #include <string> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp index 637a877810a1..80feb0045406 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GCRootLowering.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -24,9 +23,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCContext.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index f9bfe8518083..ac140e745600 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -67,7 +67,8 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { } bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) { - return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_IMPLICIT_DEF; + return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT || + Opc == TargetOpcode::G_IMPLICIT_DEF; } std::unique_ptr<CSEConfigBase> @@ -88,7 +89,7 @@ void GISelCSEInfo::setMF(MachineFunction &MF) { this->MRI = &MF.getRegInfo(); } -GISelCSEInfo::~GISelCSEInfo() {} +GISelCSEInfo::~GISelCSEInfo() = default; bool GISelCSEInfo::isUniqueMachineInstValid( const UniqueMachineInstr &UMI) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 1a642e233a6a..a432e4ed7fb7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -12,6 +12,7 @@ // #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -174,6 +175,7 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, default: break; case TargetOpcode::G_ADD: + case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: @@ -185,23 +187,54 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, case TargetOpcode::G_UDIV: case TargetOpcode::G_SDIV: case TargetOpcode::G_UREM: - case TargetOpcode::G_SREM: { + case TargetOpcode::G_SREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { // Try to constant fold these. assert(SrcOps.size() == 2 && "Invalid sources"); assert(DstOps.size() == 1 && "Invalid dsts"); - if (SrcOps[0].getLLTTy(*getMRI()).isVector()) { + LLT SrcTy = SrcOps[0].getLLTTy(*getMRI()); + + if (Opc == TargetOpcode::G_PTR_ADD && + getDataLayout().isNonIntegralAddressSpace(SrcTy.getAddressSpace())) + break; + + if (SrcTy.isVector()) { // Try to constant fold vector constants. - Register VecCst = ConstantFoldVectorBinop( - Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this); - if (VecCst) - return buildCopy(DstOps[0], VecCst); + SmallVector<APInt> VecCst = ConstantFoldVectorBinop( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI()); + if (!VecCst.empty()) + return buildBuildVectorConstant(DstOps[0], VecCst); break; } + if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) return buildConstant(DstOps[0], *Cst); break; } + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FCOPYSIGN: { + // Try to constant fold these. + assert(SrcOps.size() == 2 && "Invalid sources"); + assert(DstOps.size() == 1 && "Invalid dsts"); + if (Optional<APFloat> Cst = ConstantFoldFPBinOp( + Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI())) + return buildFConstant(DstOps[0], *Cst); + break; + } case TargetOpcode::G_SEXT_INREG: { assert(DstOps.size() == 1 && "Invalid dst ops"); assert(SrcOps.size() == 2 && "Invalid src ops"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 1ec7868f2234..081c8b125f17 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,16 +11,16 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -698,10 +698,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler, ValTy, extendOpFromFlags(Args[i].Flags[0])); } + bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL); for (unsigned Part = 0; Part < NumParts; ++Part) { Register ArgReg = Args[i].Regs[Part]; // There should be Regs.size() ArgLocs per argument. - VA = ArgLocs[j + Part]; + unsigned Idx = BigEndianPartOrdering ? NumParts - 1 - Part : Part; + CCValAssign &VA = ArgLocs[j + Idx]; const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; if (VA.isMemLoc() && !Flags.isByVal()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 30f8838805b5..1a5fe3e84c17 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -13,14 +13,13 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" -#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "gi-combiner" @@ -57,8 +56,7 @@ class WorkListMaintainer : public GISelChangeObserver { public: WorkListMaintainer(WorkListTy &WorkList) : WorkList(WorkList) {} - virtual ~WorkListMaintainer() { - } + virtual ~WorkListMaintainer() = default; void erasingInstr(MachineInstr &MI) override { LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n"); @@ -115,7 +113,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, bool MFChanged = false; bool Changed; - MachineIRBuilder &B = *Builder.get(); + MachineIRBuilder &B = *Builder; do { // Collect all instructions. Do a post order traversal for basic blocks and diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d6a009744161..2c94f87804ac 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -8,7 +8,6 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" @@ -16,23 +15,22 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" #include <tuple> #define DEBUG_TYPE "gi-combiner" @@ -131,9 +129,27 @@ isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, return BigEndian; } +bool CombinerHelper::isPreLegalize() const { return !LI; } + +bool CombinerHelper::isLegal(const LegalityQuery &Query) const { + assert(LI && "Must have LegalizerInfo to query isLegal!"); + return LI->getAction(Query).Action == LegalizeActions::Legal; +} + bool CombinerHelper::isLegalOrBeforeLegalizer( const LegalityQuery &Query) const { - return !LI || LI->getAction(Query).Action == LegalizeActions::Legal; + return isPreLegalize() || isLegal(Query); +} + +bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const { + if (!Ty.isVector()) + return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}}); + // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs. + if (isPreLegalize()) + return true; + LLT EltTy = Ty.getElementType(); + return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) && + isLegal({TargetOpcode::G_CONSTANT, {EltTy}}); } void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, @@ -1275,12 +1291,12 @@ bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI, Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI); - return Cst.hasValue(); + return Cst.has_value(); } void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI, Optional<APFloat> &Cst) { - assert(Cst.hasValue() && "Optional is unexpectedly empty!"); + assert(Cst && "Optional is unexpectedly empty!"); Builder.setInstrAndDebugLoc(MI); MachineFunction &MF = Builder.getMF(); auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst); @@ -2350,6 +2366,19 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr)) return false; + // If both instructions are loads or stores, they are equal only if both + // are dereferenceable invariant loads with the same number of bits. + if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) { + GLoadStore *LS1 = dyn_cast<GLoadStore>(I1); + GLoadStore *LS2 = dyn_cast<GLoadStore>(I2); + if (!LS1 || !LS2) + return false; + + if (!I2->isDereferenceableInvariantLoad(nullptr) || + (LS1->getMemSizeInBits() != LS2->getMemSizeInBits())) + return false; + } + // Check for physical registers on the instructions first to avoid cases // like this: // @@ -2397,7 +2426,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) { return false; auto *MI = MRI.getVRegDef(MOP.getReg()); auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI); - return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 && + return MaybeCst && MaybeCst->getBitWidth() <= 64 && MaybeCst->getSExtValue() == C; } @@ -2916,7 +2945,7 @@ bool CombinerHelper::matchNotCmp(MachineInstr &MI, int64_t Cst; if (Ty.isVector()) { MachineInstr *CstDef = MRI.getVRegDef(CstReg); - auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI); + auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI); if (!MaybeCst) return false; if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP)) @@ -3049,6 +3078,102 @@ void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) { MI.eraseFromParent(); } +bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI, + unsigned &SelectOpNo) { + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + Register OtherOperandReg = RHS; + SelectOpNo = 1; + MachineInstr *Select = MRI.getVRegDef(LHS); + + // Don't do this unless the old select is going away. We want to eliminate the + // binary operator, not replace a binop with a select. + if (Select->getOpcode() != TargetOpcode::G_SELECT || + !MRI.hasOneNonDBGUse(LHS)) { + OtherOperandReg = LHS; + SelectOpNo = 2; + Select = MRI.getVRegDef(RHS); + if (Select->getOpcode() != TargetOpcode::G_SELECT || + !MRI.hasOneNonDBGUse(RHS)) + return false; + } + + MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg()); + MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg()); + + if (!isConstantOrConstantVector(*SelectLHS, MRI, + /*AllowFP*/ true, + /*AllowOpaqueConstants*/ false)) + return false; + if (!isConstantOrConstantVector(*SelectRHS, MRI, + /*AllowFP*/ true, + /*AllowOpaqueConstants*/ false)) + return false; + + unsigned BinOpcode = MI.getOpcode(); + + // We know know one of the operands is a select of constants. Now verify that + // the other binary operator operand is either a constant, or we can handle a + // variable. + bool CanFoldNonConst = + (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) && + (isNullOrNullSplat(*SelectLHS, MRI) || + isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) && + (isNullOrNullSplat(*SelectRHS, MRI) || + isAllOnesOrAllOnesSplat(*SelectRHS, MRI)); + if (CanFoldNonConst) + return true; + + return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI, + /*AllowFP*/ true, + /*AllowOpaqueConstants*/ false); +} + +/// \p SelectOperand is the operand in binary operator \p MI that is the select +/// to fold. +bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI, + const unsigned &SelectOperand) { + Builder.setInstrAndDebugLoc(MI); + + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg()); + + Register SelectCond = Select->getOperand(1).getReg(); + Register SelectTrue = Select->getOperand(2).getReg(); + Register SelectFalse = Select->getOperand(3).getReg(); + + LLT Ty = MRI.getType(Dst); + unsigned BinOpcode = MI.getOpcode(); + + Register FoldTrue, FoldFalse; + + // We have a select-of-constants followed by a binary operator with a + // constant. Eliminate the binop by pulling the constant math into the select. + // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO + if (SelectOperand == 1) { + // TODO: SelectionDAG verifies this actually constant folds before + // committing to the combine. + + FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0); + FoldFalse = + Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0); + } else { + FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0); + FoldFalse = + Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0); + } + + Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags()); + Observer.erasingInstr(*Select); + Select->eraseFromParent(); + MI.eraseFromParent(); + + return true; +} + Optional<SmallVector<Register, 8>> CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const { assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!"); @@ -3340,7 +3465,7 @@ bool CombinerHelper::matchLoadOrCombine( // BSWAP. bool IsBigEndianTarget = MF.getDataLayout().isBigEndian(); Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx); - if (!IsBigEndian.hasValue()) + if (!IsBigEndian) return false; bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian; if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}})) @@ -3848,7 +3973,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector( auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI); if (!Cst) return false; - unsigned Idx = Cst.getValue().getZExtValue(); + unsigned Idx = Cst->getZExtValue(); if (Idx >= NumElts) return false; // Out of range. ExtractedElts.set(Idx); @@ -3904,10 +4029,9 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI, // Given constants C0 and C1 such that C0 + C1 is bit-width: // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1) - // TODO: Match constant splat. int64_t CstShlAmt, CstLShrAmt; - if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) && - mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) && + if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) && + mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) && CstShlAmt + CstLShrAmt == BitWidth) { FshOpc = TargetOpcode::G_FSHR; Amt = LShrAmt; @@ -3958,7 +4082,7 @@ void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) { Observer.changingInstr(MI); MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR)); - MI.RemoveOperand(2); + MI.removeOperand(2); Observer.changedInstr(MI); } @@ -4100,18 +4224,23 @@ bool CombinerHelper::matchAndOrDisjointMask( return false; Register Src; - int64_t MaskAnd; - int64_t MaskOr; + Register AndMaskReg; + int64_t AndMaskBits; + int64_t OrMaskBits; if (!mi_match(MI, MRI, - m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd)))) + m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)), + m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg))))) return false; - // Check if MaskOr could turn on any bits in Src. - if (MaskAnd & MaskOr) + // Check if OrMask could turn on any bits in Src. + if (AndMaskBits & OrMaskBits) return false; MatchInfo = [=, &MI](MachineIRBuilder &B) { Observer.changingInstr(MI); + // Canonicalize the result to have the constant on the RHS. + if (MI.getOperand(1).getReg() == AndMaskReg) + MI.getOperand(2).setReg(AndMaskReg); MI.getOperand(1).setReg(Src); Observer.changedInstr(MI); }; @@ -4259,6 +4388,14 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd( if (ShrAmt < 0 || ShrAmt >= Size) return false; + // If the shift subsumes the mask, emit the 0 directly. + if (0 == (SMask >> ShrAmt)) { + MatchInfo = [=](MachineIRBuilder &B) { + B.buildConstant(Dst, 0); + }; + return true; + } + // Check that ubfx can do the extraction, with no holes in the mask. uint64_t UMask = SMask; UMask |= maskTrailingOnes<uint64_t>(ShrAmt); @@ -4585,6 +4722,42 @@ bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) { return true; } +bool CombinerHelper::matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { + // (G_*MULO x, 0) -> 0 + no carry out + assert(MI.getOpcode() == TargetOpcode::G_UMULO || + MI.getOpcode() == TargetOpcode::G_SMULO); + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register Carry = MI.getOperand(1).getReg(); + if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) || + !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry))) + return false; + MatchInfo = [=](MachineIRBuilder &B) { + B.buildConstant(Dst, 0); + B.buildConstant(Carry, 0); + }; + return true; +} + +bool CombinerHelper::matchAddOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) { + // (G_*ADDO x, 0) -> x + no carry out + assert(MI.getOpcode() == TargetOpcode::G_UADDO || + MI.getOpcode() == TargetOpcode::G_SADDO); + if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0))) + return false; + Register Carry = MI.getOperand(1).getReg(); + if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Carry))) + return false; + Register Dst = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(2).getReg(); + MatchInfo = [=](MachineIRBuilder &B) { + B.buildCopy(Dst, LHS); + B.buildConstant(Carry, 0); + }; + return true; +} + MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UDIV); auto &UDiv = cast<GenericMachineInstr>(MI); @@ -5376,6 +5549,106 @@ bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA( return false; } +bool CombinerHelper::matchSelectToLogical(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GSelect &Sel = cast<GSelect>(MI); + Register DstReg = Sel.getReg(0); + Register Cond = Sel.getCondReg(); + Register TrueReg = Sel.getTrueReg(); + Register FalseReg = Sel.getFalseReg(); + + auto *TrueDef = getDefIgnoringCopies(TrueReg, MRI); + auto *FalseDef = getDefIgnoringCopies(FalseReg, MRI); + + const LLT CondTy = MRI.getType(Cond); + const LLT OpTy = MRI.getType(TrueReg); + if (CondTy != OpTy || OpTy.getScalarSizeInBits() != 1) + return false; + + // We have a boolean select. + + // select Cond, Cond, F --> or Cond, F + // select Cond, 1, F --> or Cond, F + auto MaybeCstTrue = isConstantOrConstantSplatVector(*TrueDef, MRI); + if (Cond == TrueReg || (MaybeCstTrue && MaybeCstTrue->isOne())) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildOr(DstReg, Cond, FalseReg); + }; + return true; + } + + // select Cond, T, Cond --> and Cond, T + // select Cond, T, 0 --> and Cond, T + auto MaybeCstFalse = isConstantOrConstantSplatVector(*FalseDef, MRI); + if (Cond == FalseReg || (MaybeCstFalse && MaybeCstFalse->isZero())) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildAnd(DstReg, Cond, TrueReg); + }; + return true; + } + + // select Cond, T, 1 --> or (not Cond), T + if (MaybeCstFalse && MaybeCstFalse->isOne()) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildOr(DstReg, MIB.buildNot(OpTy, Cond), TrueReg); + }; + return true; + } + + // select Cond, 0, F --> and (not Cond), F + if (MaybeCstTrue && MaybeCstTrue->isZero()) { + MatchInfo = [=](MachineIRBuilder &MIB) { + MIB.buildAnd(DstReg, MIB.buildNot(OpTy, Cond), FalseReg); + }; + return true; + } + return false; +} + +bool CombinerHelper::matchCombineFMinMaxNaN(MachineInstr &MI, + unsigned &IdxToPropagate) { + bool PropagateNaN; + switch (MI.getOpcode()) { + default: + return false; + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + PropagateNaN = false; + break; + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + PropagateNaN = true; + break; + } + + auto MatchNaN = [&](unsigned Idx) { + Register MaybeNaNReg = MI.getOperand(Idx).getReg(); + const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI); + if (!MaybeCst || !MaybeCst->getValueAPF().isNaN()) + return false; + IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1); + return true; + }; + + return MatchNaN(1) || MatchNaN(2); +} + +bool CombinerHelper::matchAddSubSameReg(MachineInstr &MI, Register &Src) { + assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD"); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + + // Helper lambda to check for opportunities for + // A + (B - A) -> B + // (B - A) + A -> B + auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) { + Register Reg; + return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) && + Reg == MaybeSameReg; + }; + return CheckFold(LHS, RHS) || CheckFold(RHS, LHS); +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp index 64c2f0d5f8e4..4f03af0fce82 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -567,6 +567,26 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); break; } + case TargetOpcode::G_UADDO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_USUBE: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_UMULO: + case TargetOpcode::G_SMULO: { + if (MI.getOperand(1).getReg() == R) { + // If we know the result of a compare has the top bits zero, use this + // info. + if (TL.getBooleanContents(DstTy.isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + } + break; + } } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -673,6 +693,27 @@ unsigned GISelKnownBits::computeNumSignBits(Register R, MI.getOperand(3).getReg(), DemandedElts, Depth + 1); } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBO: + case TargetOpcode::G_USUBE: + case TargetOpcode::G_SMULO: + case TargetOpcode::G_UMULO: { + // If compares returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (MI.getOperand(1).getReg() == R) { + if (TL.getBooleanContents(DstTy.isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return TyBits; + } + + break; + } case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: default: { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index 252b931602c6..efcc40641ea8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/InitializePasses.h" -#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 847df84afba6..a2af66d28f4a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,10 +16,11 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" @@ -47,7 +48,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" @@ -78,7 +78,6 @@ #include "llvm/Transforms/Utils/MemoryOpRemark.h" #include <algorithm> #include <cassert> -#include <cstddef> #include <cstdint> #include <iterator> #include <string> @@ -1818,7 +1817,7 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) { bool IRTranslator::translateConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) { - fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID()); if (!Opcode) @@ -2252,6 +2251,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0}; return CLI->lowerCall(MIRBuilder, Info); } + case Intrinsic::fptrunc_round: { + unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI); + + // Convert the metadata argument to a constant integer + Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata(); + Optional<RoundingMode> RoundMode = + convertStrToRoundingMode(cast<MDString>(MD)->getString()); + + // Add the Rounding mode as an integer + MIRBuilder + .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND, + {getOrCreateVReg(CI)}, + {getOrCreateVReg(*CI.getArgOperand(0))}, Flags) + .addImm((int)*RoundMode); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -2409,7 +2425,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - Align Alignment = Info.align.getValueOr( + Align Alignment = Info.align.value_or( DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); LLT MemTy = Info.memVT.isSimple() ? getLLTForMVT(Info.memVT.getSimpleVT()) @@ -2934,15 +2950,6 @@ void IRTranslator::finishPendingPhis() { } } -bool IRTranslator::valueIsSplit(const Value &V, - SmallVectorImpl<uint64_t> *Offsets) { - SmallVector<LLT, 4> SplitTys; - if (Offsets && !Offsets->empty()) - Offsets->clear(); - computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); - return SplitTys.size() > 1; -} - bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); @@ -2984,7 +2991,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { // Return the scalar if it is a <1 x Ty> vector. unsigned NumElts = CAZ->getElementCount().getFixedValue(); if (NumElts == 1) - return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get()); + return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder); SmallVector<Register, 4> Ops; for (unsigned I = 0; I < NumElts; ++I) { Constant &Elt = *CAZ->getElementValue(I); @@ -2994,8 +3001,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) { // Return the scalar if it is a <1 x Ty> vector. if (CV->getNumElements() == 1) - return translateCopy(C, *CV->getElementAsConstant(0), - *EntryBuilder.get()); + return translateCopy(C, *CV->getElementAsConstant(0), *EntryBuilder); SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumElements(); ++i) { Constant &Elt = *CV->getElementAsConstant(i); @@ -3013,7 +3019,7 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } } else if (auto CV = dyn_cast<ConstantVector>(&C)) { if (CV->getNumOperands() == 1) - return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get()); + return translateCopy(C, *CV->getOperand(0), *EntryBuilder); SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumOperands(); ++i) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); @@ -3255,14 +3261,13 @@ bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD, return false; } - // On PS4, the "return address" must still be within the calling function, - // even if it's at the very end, so emit an explicit TRAP here. - // Passing 'true' for doesNotReturn above won't generate the trap for us. + // On PS4/PS5, the "return address" must still be within the calling + // function, even if it's at the very end, so emit an explicit TRAP here. // WebAssembly needs an unreachable instruction after a non-returning call, // because the function return type can be different from __stack_chk_fail's // return type (void). const TargetMachine &TM = MF->getTarget(); - if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) { + if (TM.getTargetTriple().isPS() || TM.getTargetTriple().isWasm()) { LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n"); return false; } @@ -3413,7 +3418,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { + if (!CLI->lowerFormalArguments(*EntryBuilder, F, VRegArgs, FuncInfo)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index e5f95ca5aa73..95ae8383b6fa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -12,15 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "inline-asm-lowering" @@ -150,6 +145,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_RegisterClass: return 2; case TargetLowering::C_Memory: + case TargetLowering::C_Address: return 3; } llvm_unreachable("Invalid constraint type"); @@ -310,7 +306,7 @@ bool InlineAsmLowering::lowerInlineAsm( // If this is an indirect operand, the operand is a pointer to the // accessed type. if (OpInfo.isIndirect) { - OpTy = Call.getAttributes().getParamElementType(ArgNo); + OpTy = Call.getParamElementType(ArgNo); assert(OpTy && "Indirect operand must have elementtype attribute"); } @@ -649,6 +645,8 @@ bool InlineAsmLowering::lowerInlineAsm( return false; case TargetLowering::C_Memory: break; // Already handled. + case TargetLowering::C_Address: + break; // Silence warning. case TargetLowering::C_Unknown: LLVM_DEBUG(dbgs() << "Unexpected unknown constraint\n"); return false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 2bb5addefe48..28f3b425c67d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,8 +12,6 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -23,14 +21,13 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/config.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGenCoverage.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 1d0c106fd5db..8959d215ecd1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -13,16 +13,9 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include <cassert> #define DEBUG_TYPE "instructionselector" @@ -66,6 +59,10 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, std::next(MI.getIterator()) == IntoMI.getIterator()) return true; + // Convergent instructions cannot be moved in the CFG. + if (MI.isConvergent() && MI.getParent() != IntoMI.getParent()) + return false; + return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 1f0738a8d9d2..54a82cac95d5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -188,6 +188,13 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { }; } +LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) { + return [=](const LegalityQuery &Query) { + const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy; + return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes()); + }; +} + LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT QueryTy = Query.Types[TypeIdx]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp index 75b7fcb5663a..25c1db91b05d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp @@ -43,6 +43,27 @@ LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx, }; } +LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, + unsigned FromTypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + const LLT NewTy = Query.Types[FromTypeIdx]; + ElementCount NewEltCount = + NewTy.isVector() ? NewTy.getElementCount() : ElementCount::getFixed(1); + return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + }; +} + +LegalizeMutation LegalizeMutations::changeElementCountTo(unsigned TypeIdx, + LLT NewEltTy) { + return [=](const LegalityQuery &Query) { + const LLT OldTy = Query.Types[TypeIdx]; + ElementCount NewEltCount = NewEltTy.isVector() ? NewEltTy.getElementCount() + : ElementCount::getFixed(1); + return std::make_pair(TypeIdx, OldTy.changeElementCount(NewEltCount)); + }; +} + LegalizeMutation LegalizeMutations::changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx) { return [=](const LegalityQuery &Query) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 0ab4a7f64840..f09e5b7ce783 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" @@ -24,15 +24,11 @@ #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" -#include "llvm/Target/TargetMachine.h" - -#include <iterator> #define DEBUG_TYPE "legalizer" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 37bc8a65dc7c..fb046d519ac8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -15,10 +15,13 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -1611,40 +1614,6 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } -Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { - Register WideReg = MRI.createGenericVirtualRegister(WideTy); - LLT OrigTy = MRI.getType(OrigReg); - LLT LCMTy = getLCMType(WideTy, OrigTy); - - const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits(); - const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits(); - - Register UnmergeSrc = WideReg; - - // Create a merge to the LCM type, padding with undef - // %0:_(<3 x s32>) = G_FOO => <4 x s32> - // => - // %1:_(<4 x s32>) = G_FOO - // %2:_(<4 x s32>) = G_IMPLICIT_DEF - // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2 - // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3 - if (NumMergeParts > 1) { - Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0); - SmallVector<Register, 8> MergeParts(NumMergeParts, Undef); - MergeParts[0] = WideReg; - UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0); - } - - // Unmerge to the original register and pad with dead defs. - SmallVector<Register, 8> UnmergeResults(NumUnmergeParts); - UnmergeResults[0] = OrigReg; - for (int I = 1; I != NumUnmergeParts; ++I) - UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy); - - MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc); - return WideReg; -} - LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { @@ -1867,9 +1836,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx == 1) - return UnableToLegalize; // TODO - unsigned Opcode; unsigned ExtOpcode; Optional<Register> CarryIn = None; @@ -1914,6 +1880,18 @@ LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx, break; } + if (TypeIdx == 1) { + unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false); + + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 1); + if (CarryIn) + widenScalarSrc(MI, WideTy, 4, BoolExtOp); + + Observer.changedInstr(MI); + return Legalized; + } + auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)}); auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)}); // Do the arithmetic in the larger type. @@ -1985,8 +1963,12 @@ LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - if (TypeIdx == 1) - return UnableToLegalize; + if (TypeIdx == 1) { + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 1); + Observer.changedInstr(MI); + return Legalized; + } bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO; Register Result = MI.getOperand(0).getReg(); @@ -2992,7 +2974,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) { if (isa<GSExtLoad>(LoadMI)) { auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits); - } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) { + } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) { auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO); // The extra bits are guaranteed to be zero, since we stored them that // way. A zext load from Wide thus automatically gives zext from MemVT. @@ -3314,7 +3296,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { Observer.changingInstr(MI); const auto &TII = MIRBuilder.getTII(); MI.setDesc(TII.get(TargetOpcode::G_MUL)); - MI.RemoveOperand(1); + MI.removeOperand(1); Observer.changedInstr(MI); auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS}); @@ -4096,13 +4078,14 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, // is a load, return the new registers in ValRegs. For a store, each elements // of ValRegs should be PartTy. Returns the next offset that needs to be // handled. + bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian(); auto MMO = LdStMI.getMMO(); auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, - unsigned Offset) -> unsigned { + unsigned NumParts, unsigned Offset) -> unsigned { MachineFunction &MF = MIRBuilder.getMF(); unsigned PartSize = PartTy.getSizeInBits(); for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; - Offset += PartSize, ++Idx) { + ++Idx) { unsigned ByteOffset = Offset / 8; Register NewAddrReg; @@ -4118,16 +4101,19 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, } else { MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); } + Offset = isBigEndian ? Offset - PartSize : Offset + PartSize; } return Offset; }; - unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); + unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0; + unsigned HandledOffset = + splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset); // Handle the rest of the register if this isn't an even type breakdown. if (LeftoverTy.isValid()) - splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); + splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset); if (IsLoad) { insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, @@ -4236,6 +4222,14 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTTOPTR: case G_PTRTOINT: case G_ADDRSPACE_CAST: + case G_UADDO: + case G_USUBO: + case G_UADDE: + case G_USUBE: + case G_SADDO: + case G_SSUBO: + case G_SADDE: + case G_SSUBE: return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: @@ -4882,10 +4876,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SELECT: - if (TypeIdx != 0) - return UnableToLegalize; - if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + case TargetOpcode::G_SELECT: { + Register DstReg = MI.getOperand(0).getReg(); + Register CondReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT CondTy = MRI.getType(CondReg); + if (TypeIdx == 1) { + if (!CondTy.isScalar() || + DstTy.getElementCount() != MoreTy.getElementCount()) + return UnableToLegalize; + + // This is turning a scalar select of vectors into a vector + // select. Broadcast the select condition. + auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg); + Observer.changingInstr(MI); + MI.getOperand(1).setReg(ShufSplat.getReg(0)); + Observer.changedInstr(MI); + return Legalized; + } + + if (CondTy.isVector()) return UnableToLegalize; Observer.changingInstr(MI); @@ -4894,6 +4904,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + } case TargetOpcode::G_UNMERGE_VALUES: return UnableToLegalize; case TargetOpcode::G_PHI: @@ -7229,25 +7240,32 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { Register Op2Reg = MI.getOperand(3).getReg(); LLT DstTy = MRI.getType(DstReg); LLT MaskTy = MRI.getType(MaskReg); - LLT Op1Ty = MRI.getType(Op1Reg); if (!DstTy.isVector()) return UnableToLegalize; - // Vector selects can have a scalar predicate. If so, splat into a vector and - // finish for later legalization attempts to try again. if (MaskTy.isScalar()) { + // Turn the scalar condition into a vector condition mask. + Register MaskElt = MaskReg; - if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits()) - MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0); - // Generate a vector splat idiom to be pattern matched later. + + // The condition was potentially zero extended before, but we want a sign + // extended boolean. + if (MaskTy.getSizeInBits() <= DstTy.getScalarSizeInBits() && + MaskTy != LLT::scalar(1)) { + MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0); + } + + // Continue the sign extension (or truncate) to match the data type. + MaskElt = MIRBuilder.buildSExtOrTrunc(DstTy.getElementType(), + MaskElt).getReg(0); + + // Generate a vector splat idiom. auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt); - Observer.changingInstr(MI); - MI.getOperand(1).setReg(ShufSplat.getReg(0)); - Observer.changedInstr(MI); - return Legalized; + MaskReg = ShufSplat.getReg(0); + MaskTy = DstTy; } - if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) { + if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) { return UnableToLegalize; } @@ -7414,7 +7432,7 @@ static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { unsigned NumBits = Ty.getScalarSizeInBits(); auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI); if (!Ty.isVector() && ValVRegAndVal) { - APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8); + APInt Scalar = ValVRegAndVal->Value.trunc(8); APInt SplatVal = APInt::getSplat(NumBits, Scalar); return MIB.buildConstant(Ty, SplatVal).getReg(0); } @@ -7569,7 +7587,7 @@ LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) { // See if this is a constant length copy auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI); // FIXME: support dynamically sized G_MEMCPY_INLINE - assert(LenVRegAndVal.hasValue() && + assert(LenVRegAndVal && "inline memcpy with dynamic size is not yet supported"); uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { @@ -7609,7 +7627,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) @@ -7644,7 +7662,7 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; + NewAlign = NewAlign.previous(); if (NewAlign > Alignment) { Alignment = NewAlign; @@ -7717,7 +7735,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); bool OptSize = shouldLowerMemFuncForSize(MF); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) @@ -7752,7 +7770,7 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; + NewAlign = NewAlign.previous(); if (NewAlign > Alignment) { Alignment = NewAlign; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 30697913a6a4..6adb7ddb5b66 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -23,9 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MathExtras.h" #include <algorithm> -#include <map> using namespace llvm; using namespace LegalizeActions; @@ -132,15 +129,16 @@ static bool mutationIsSane(const LegalizeRule &Rule, LLVM_FALLTHROUGH; case MoreElements: { // MoreElements can go from scalar to vector. - const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1; + const ElementCount OldElts = OldTy.isVector() ? + OldTy.getElementCount() : ElementCount::getFixed(1); if (NewTy.isVector()) { if (Rule.getAction() == FewerElements) { // Make sure the element count really decreased. - if (NewTy.getNumElements() >= OldElts) + if (ElementCount::isKnownGE(NewTy.getElementCount(), OldElts)) return false; } else { // Make sure the element count really increased. - if (NewTy.getNumElements() <= OldElts) + if (ElementCount::isKnownLE(NewTy.getElementCount(), OldElts)) return false; } } else if (Rule.getAction() == MoreElements) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp index de8dbd456901..d4fbf7d15089 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp @@ -73,6 +73,7 @@ void LoadStoreOpt::init(MachineFunction &MF) { void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); + AU.setPreservesAll(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -508,6 +509,12 @@ bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI, if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits()) return false; + // Avoid adding volatile or ordered stores to the candidate. We already have a + // check for this in instMayAlias() but that only get's called later between + // potential aliasing hazards. + if (!StoreMI.isSimple()) + return false; + Register StoreAddr = StoreMI.getPointerReg(); auto BIO = getPointerInfo(StoreAddr, *MRI); Register StoreBase = BIO.BaseReg; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp index 328a278f3d68..c1287693e74d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/InitializePasses.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index c6720568b362..19ebf46191a9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -9,8 +9,6 @@ /// This file implements the MachineIRBuidler class. //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -19,7 +17,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" using namespace llvm; @@ -568,47 +566,6 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst, return Extract; } -void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops, - ArrayRef<uint64_t> Indices) { -#ifndef NDEBUG - assert(Ops.size() == Indices.size() && "incompatible args"); - assert(!Ops.empty() && "invalid trivial sequence"); - assert(llvm::is_sorted(Indices) && - "sequence offsets must be in ascending order"); - - assert(getMRI()->getType(Res).isValid() && "invalid operand type"); - for (auto Op : Ops) - assert(getMRI()->getType(Op).isValid() && "invalid operand type"); -#endif - - LLT ResTy = getMRI()->getType(Res); - LLT OpTy = getMRI()->getType(Ops[0]); - unsigned OpSize = OpTy.getSizeInBits(); - bool MaybeMerge = true; - for (unsigned i = 0; i < Ops.size(); ++i) { - if (getMRI()->getType(Ops[i]) != OpTy || Indices[i] != i * OpSize) { - MaybeMerge = false; - break; - } - } - - if (MaybeMerge && Ops.size() * OpSize == ResTy.getSizeInBits()) { - buildMerge(Res, Ops); - return; - } - - Register ResIn = getMRI()->createGenericVirtualRegister(ResTy); - buildUndef(ResIn); - - for (unsigned i = 0; i < Ops.size(); ++i) { - Register ResOut = i + 1 == Ops.size() - ? Res - : getMRI()->createGenericVirtualRegister(ResTy); - buildInsert(ResOut, ResIn, Ops[i], Indices[i]); - ResIn = ResOut; - } -} - MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) { return buildInstr(TargetOpcode::G_IMPLICIT_DEF, {Res}, {}); } @@ -666,6 +623,17 @@ MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res, return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); } +MachineInstrBuilder +MachineIRBuilder::buildBuildVectorConstant(const DstOp &Res, + ArrayRef<APInt> Ops) { + SmallVector<SrcOp> TmpVec; + TmpVec.reserve(Ops.size()); + LLT EltTy = Res.getLLTTy(*getMRI()).getElementType(); + for (auto &Op : Ops) + TmpVec.push_back(buildConstant(EltTy, Op)); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec); +} + MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res, const SrcOp &Src) { SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 01af6bb51bb7..bce850ee212c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -14,8 +14,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -25,12 +23,13 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -631,7 +630,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { "Unexpected hint opcode!"); // The only correct mapping for these is to always use the source register // bank. - const RegisterBank *RB = MRI->getRegBankOrNull(MI.getOperand(1).getReg()); + const RegisterBank *RB = + RBI->getRegBank(MI.getOperand(1).getReg(), *MRI, *TRI); // We can assume every instruction above this one has a selected register // bank. assert(RB && "Expected source register to have a register bank?"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 544af9a2954f..7781761bc131 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -16,14 +16,14 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSizeOpts.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -31,6 +31,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #define DEBUG_TYPE "globalisel-utils" @@ -56,6 +57,11 @@ Register llvm::constrainOperandRegClass( // Assume physical registers are properly constrained. assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); + // Save the old register class to check whether + // the change notifications will be required. + // TODO: A better approach would be to pass + // the observers to constrainRegToClass(). + auto *OldRegClass = MRI.getRegClassOrNull(Reg); Register ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible // then create a copy between the new and the old register. @@ -81,7 +87,7 @@ Register llvm::constrainOperandRegClass( if (GISelChangeObserver *Observer = MF.getObserver()) { Observer->changedInstr(*RegMO.getParent()); } - } else { + } else if (OldRegClass != MRI.getRegClassOrNull(Reg)) { if (GISelChangeObserver *Observer = MF.getObserver()) { if (!RegMO.isDef()) { MachineInstr *RegDef = MRI.getVRegDef(Reg); @@ -500,6 +506,7 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, default: break; case TargetOpcode::G_ADD: + case TargetOpcode::G_PTR_ADD: return C1 + C2; case TargetOpcode::G_AND: return C1 & C2; @@ -533,6 +540,14 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1, if (!C2.getBoolValue()) break; return C1.srem(C2); + case TargetOpcode::G_SMIN: + return APIntOps::smin(C1, C2); + case TargetOpcode::G_SMAX: + return APIntOps::smax(C1, C2); + case TargetOpcode::G_UMIN: + return APIntOps::umin(C1, C2); + case TargetOpcode::G_UMAX: + return APIntOps::umax(C1, C2); } return None; @@ -592,33 +607,27 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, return None; } -Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, - const Register Op2, - const MachineRegisterInfo &MRI, - MachineIRBuilder &MIB) { +SmallVector<APInt> +llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, + const Register Op2, + const MachineRegisterInfo &MRI) { auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI); if (!SrcVec2) - return Register(); + return SmallVector<APInt>(); auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI); if (!SrcVec1) - return Register(); + return SmallVector<APInt>(); - const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0)); - - SmallVector<Register, 16> FoldedElements; + SmallVector<APInt> FoldedElements; for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) { auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx), SrcVec2->getSourceReg(Idx), MRI); if (!MaybeCst) - return Register(); - auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0); - FoldedElements.emplace_back(FoldedCstReg); + return SmallVector<APInt>(); + FoldedElements.push_back(*MaybeCst); } - // Create the new vector constant. - auto CstVec = - MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements); - return CstVec.getReg(0); + return FoldedElements; } bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -1061,15 +1070,38 @@ bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI, AllowUndef); } +Optional<APInt> llvm::getIConstantSplatVal(const Register Reg, + const MachineRegisterInfo &MRI) { + if (auto SplatValAndReg = + getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) { + Optional<ValueAndVReg> ValAndVReg = + getIConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI); + return ValAndVReg->Value; + } + + return None; +} + +Optional<APInt> getIConstantSplatVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return getIConstantSplatVal(MI.getOperand(0).getReg(), MRI); +} + Optional<int64_t> -llvm::getBuildVectorConstantSplat(const MachineInstr &MI, - const MachineRegisterInfo &MRI) { +llvm::getIConstantSplatSExtVal(const Register Reg, + const MachineRegisterInfo &MRI) { if (auto SplatValAndReg = - getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false)) + getAnyConstantSplat(Reg, MRI, /* AllowUndef */ false)) return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI); return None; } +Optional<int64_t> +llvm::getIConstantSplatSExtVal(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + return getIConstantSplatSExtVal(MI.getOperand(0).getReg(), MRI); +} + Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef) { @@ -1095,7 +1127,7 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, unsigned Opc = MI.getOpcode(); if (!isBuildVectorOp(Opc)) return None; - if (auto Splat = getBuildVectorConstantSplat(MI, MRI)) + if (auto Splat = getIConstantSplatSExtVal(MI, MRI)) return RegOrConstant(*Splat); auto Reg = MI.getOperand(1).getReg(); if (any_of(make_range(MI.operands_begin() + 2, MI.operands_end()), @@ -1104,6 +1136,26 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI, return RegOrConstant(Reg); } +static bool isConstantScalar(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowFP = true, + bool AllowOpaqueConstants = true) { + switch (MI.getOpcode()) { + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_IMPLICIT_DEF: + return true; + case TargetOpcode::G_FCONSTANT: + return AllowFP; + case TargetOpcode::G_GLOBAL_VALUE: + case TargetOpcode::G_FRAME_INDEX: + case TargetOpcode::G_BLOCK_ADDR: + case TargetOpcode::G_JUMP_TABLE: + return AllowOpaqueConstants; + default: + return false; + } +} + bool llvm::isConstantOrConstantVector(MachineInstr &MI, const MachineRegisterInfo &MRI) { Register Def = MI.getOperand(0).getReg(); @@ -1121,19 +1173,71 @@ bool llvm::isConstantOrConstantVector(MachineInstr &MI, return true; } +bool llvm::isConstantOrConstantVector(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowFP, bool AllowOpaqueConstants) { + if (isConstantScalar(MI, MRI, AllowFP, AllowOpaqueConstants)) + return true; + + if (!isBuildVectorOp(MI.getOpcode())) + return false; + + const unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I != NumOps; ++I) { + const MachineInstr *ElementDef = MRI.getVRegDef(MI.getOperand(I).getReg()); + if (!isConstantScalar(*ElementDef, MRI, AllowFP, AllowOpaqueConstants)) + return false; + } + + return true; +} + Optional<APInt> llvm::isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI) { Register Def = MI.getOperand(0).getReg(); if (auto C = getIConstantVRegValWithLookThrough(Def, MRI)) return C->Value; - auto MaybeCst = getBuildVectorConstantSplat(MI, MRI); + auto MaybeCst = getIConstantSplatSExtVal(MI, MRI); if (!MaybeCst) return None; const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits(); return APInt(ScalarSize, *MaybeCst, true); } +bool llvm::isNullOrNullSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, bool AllowUndefs) { + switch (MI.getOpcode()) { + case TargetOpcode::G_IMPLICIT_DEF: + return AllowUndefs; + case TargetOpcode::G_CONSTANT: + return MI.getOperand(1).getCImm()->isNullValue(); + case TargetOpcode::G_FCONSTANT: { + const ConstantFP *FPImm = MI.getOperand(1).getFPImm(); + return FPImm->isZero() && !FPImm->isNegative(); + } + default: + if (!AllowUndefs) // TODO: isBuildVectorAllZeros assumes undef is OK already + return false; + return isBuildVectorAllZeros(MI, MRI); + } +} + +bool llvm::isAllOnesOrAllOnesSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + bool AllowUndefs) { + switch (MI.getOpcode()) { + case TargetOpcode::G_IMPLICIT_DEF: + return AllowUndefs; + case TargetOpcode::G_CONSTANT: + return MI.getOperand(1).getCImm()->isAllOnesValue(); + default: + if (!AllowUndefs) // TODO: isBuildVectorAllOnes assumes undef is OK already + return false; + return isBuildVectorAllOnes(MI, MRI); + } +} + bool llvm::matchUnaryPredicate( const MachineRegisterInfo &MRI, Register Reg, std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp index bbd9006a5d8c..f5833d3b9086 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/GlobalMerge.cpp @@ -592,6 +592,13 @@ void GlobalMerge::setMustKeepGlobalVariables(Module &M) { if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(U->stripPointerCasts())) MustKeepGlobalVariables.insert(GV); + else if (const ConstantArray *CA = dyn_cast<ConstantArray>(U->stripPointerCasts())) { + for (const Use &Elt : CA->operands()) { + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(Elt->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } } } } @@ -609,6 +616,13 @@ bool GlobalMerge::doInitialization(Module &M) { bool Changed = false; setMustKeepGlobalVariables(M); + LLVM_DEBUG({ + dbgs() << "Number of GV that must be kept: " << + MustKeepGlobalVariables.size() << "\n"; + for (auto KeptGV = MustKeepGlobalVariables.begin(); + KeptGV != MustKeepGlobalVariables.end(); KeptGV++) + dbgs() << "Kept: " << **KeptGV << "\n"; + }); // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only diff --git a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp index 83b8c2d0eacb..67d6a3df7807 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp @@ -23,10 +23,8 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -37,7 +35,6 @@ #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp index 1b20d1da20ad..105ab908d3fa 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IfConversion.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -28,16 +29,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp index 2d38a44d5a33..5be98e114673 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -32,17 +32,13 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp index c975013db8c8..06c660807c5c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InlineSpiller.cpp @@ -23,7 +23,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStacks.h" @@ -686,7 +685,7 @@ void InlineSpiller::reMaterializeAll() { // Remove any values that were completely rematted. for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); - for (VNInfo *VNI : llvm::make_range(LI.vni_begin(), LI.vni_end())) { + for (VNInfo *VNI : LI.vnis()) { if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) continue; MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); @@ -839,6 +838,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, unsigned Idx = OpPair.second; assert(MI == OpPair.first && "Instruction conflict during operand folding"); MachineOperand &MO = MI->getOperand(Idx); + + // No point restoring an undef read, and we'll produce an invalid live + // interval. + // TODO: Is this really the correct way to handle undef tied uses? + if (MO.isUse() && !MO.readsReg() && !MO.isTied()) + continue; + if (MO.isImplicit()) { ImpReg = MO.getReg(); continue; @@ -964,7 +970,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops, if (!MO.isReg() || !MO.isImplicit()) break; if (MO.getReg() == ImpReg) - FoldMI->RemoveOperand(i - 1); + FoldMI->removeOperand(i - 1); } LLVM_DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, @@ -1608,7 +1614,7 @@ void HoistSpillHelper::hoistAllSpills() { for (unsigned i = RMEnt->getNumOperands(); i; --i) { MachineOperand &MO = RMEnt->getOperand(i - 1); if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead()) - RMEnt->RemoveOperand(i - 1); + RMEnt->removeOperand(i - 1); } } Edit.eliminateDeadDefs(SpillsToRm, None, AA); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h index ace1691c1363..97464da9f17b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterferenceCache.h @@ -37,7 +37,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache { SlotIndex First; SlotIndex Last; - BlockInterference() {} + BlockInterference() = default; }; /// Entry - A cache entry containing interference information for all aliases diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 5a20580e5479..b3f38a3b53f3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -46,6 +46,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -57,7 +58,6 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp index 230c6846dde2..43858071025a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp @@ -19,7 +19,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -31,9 +30,8 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -173,10 +171,10 @@ class Polynomial { }; /// Number of Error Bits e - unsigned ErrorMSBs; + unsigned ErrorMSBs = (unsigned)-1; /// Value - Value *V; + Value *V = nullptr; /// Coefficient B SmallVector<std::pair<BOps, APInt>, 4> B; @@ -185,7 +183,7 @@ class Polynomial { APInt A; public: - Polynomial(Value *V) : ErrorMSBs((unsigned)-1), V(V) { + Polynomial(Value *V) : V(V) { IntegerType *Ty = dyn_cast<IntegerType>(V->getType()); if (Ty) { ErrorMSBs = 0; @@ -195,12 +193,12 @@ public: } Polynomial(const APInt &A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), A(A) {} + : ErrorMSBs(ErrorMSBs), A(A) {} Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0) - : ErrorMSBs(ErrorMSBs), V(nullptr), A(BitWidth, A) {} + : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {} - Polynomial() : ErrorMSBs((unsigned)-1), V(nullptr) {} + Polynomial() = default; /// Increment and clamp the number of undefined bits. void incErrorMSBs(unsigned amt) { @@ -1206,9 +1204,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, ->getNumElements(); FixedVectorType *ILTy = FixedVectorType::get(ETy, Factor * ElementsPerSVI); - SmallVector<unsigned, 4> Indices; - for (unsigned i = 0; i < Factor; i++) - Indices.push_back(i); + auto Indices = llvm::to_vector<4>(llvm::seq<unsigned>(0, Factor)); InterleavedCost = TTI.getInterleavedMemoryOpCost( Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(), InsertionPoint->getPointerAddressSpace(), CostKind); @@ -1228,7 +1224,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad, auto MSSAU = MemorySSAUpdater(&MSSA); MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore( LI, nullptr, MSSA.getMemoryAccess(InsertionPoint))); - MSSAU.insertUse(MSSALoad); + MSSAU.insertUse(MSSALoad, /*RenameUses=*/ true); // Create the final SVIs and replace all uses. int i = 0; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp new file mode 100644 index 000000000000..23220872b532 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/JMCInstrumenter.cpp @@ -0,0 +1,233 @@ +//===- JMCInstrumenter.cpp - JMC Instrumentation --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// JMCInstrumenter pass: +// - instrument each function with a call to __CheckForDebuggerJustMyCode. The +// sole argument should be defined in .msvcjmc. Each flag is 1 byte initilized +// to 1. +// - create the dummy COMDAT function __JustMyCode_Default to prevent linking +// error if __CheckForDebuggerJustMyCode is not available. +// - For MSVC: +// add "/alternatename:__CheckForDebuggerJustMyCode=__JustMyCode_Default" to +// "llvm.linker.options" +// For ELF: +// Rename __JustMyCode_Default to __CheckForDebuggerJustMyCode and mark it as +// weak symbol. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/DJB.h" +#include "llvm/Support/Path.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "jmc-instrument" + +namespace { +struct JMCInstrumenter : public ModulePass { + static char ID; + JMCInstrumenter() : ModulePass(ID) { + initializeJMCInstrumenterPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +}; +char JMCInstrumenter::ID = 0; +} // namespace + +INITIALIZE_PASS( + JMCInstrumenter, DEBUG_TYPE, + "Instrument function entry with call to __CheckForDebuggerJustMyCode", + false, false) + +ModulePass *llvm::createJMCInstrumenterPass() { return new JMCInstrumenter(); } + +namespace { +const char CheckFunctionName[] = "__CheckForDebuggerJustMyCode"; + +std::string getFlagName(DISubprogram &SP, bool UseX86FastCall) { + // absolute windows path: windows_backslash + // relative windows backslash path: windows_backslash + // relative windows slash path: posix + // absolute posix path: posix + // relative posix path: posix + sys::path::Style PathStyle = + has_root_name(SP.getDirectory(), sys::path::Style::windows_backslash) || + SP.getDirectory().contains("\\") || + SP.getFilename().contains("\\") + ? sys::path::Style::windows_backslash + : sys::path::Style::posix; + // Best effort path normalization. This is to guarantee an unique flag symbol + // is produced for the same directory. Some builds may want to use relative + // paths, or paths with a specific prefix (see the -fdebug-compilation-dir + // flag), so only hash paths in debuginfo. Don't expand them to absolute + // paths. + SmallString<256> FilePath(SP.getDirectory()); + sys::path::append(FilePath, PathStyle, SP.getFilename()); + sys::path::native(FilePath, PathStyle); + sys::path::remove_dots(FilePath, /*remove_dot_dot=*/true, PathStyle); + + // The naming convention for the flag name is __<hash>_<file name> with '.' in + // <file name> replaced with '@'. For example C:\file.any.c would have a flag + // __D032E919_file@any@c. The naming convention match MSVC's format however + // the match is not required to make JMC work. The hashing function used here + // is different from MSVC's. + + std::string Suffix; + for (auto C : sys::path::filename(FilePath, PathStyle)) + Suffix.push_back(C == '.' ? '@' : C); + + sys::path::remove_filename(FilePath, PathStyle); + return (UseX86FastCall ? "_" : "__") + + utohexstr(djbHash(FilePath), /*LowerCase=*/false, + /*Width=*/8) + + "_" + Suffix; +} + +void attachDebugInfo(GlobalVariable &GV, DISubprogram &SP) { + Module &M = *GV.getParent(); + DICompileUnit *CU = SP.getUnit(); + assert(CU); + DIBuilder DB(M, false, CU); + + auto *DType = + DB.createBasicType("unsigned char", 8, dwarf::DW_ATE_unsigned_char, + llvm::DINode::FlagArtificial); + + auto *DGVE = DB.createGlobalVariableExpression( + CU, GV.getName(), /*LinkageName=*/StringRef(), SP.getFile(), + /*LineNo=*/0, DType, /*IsLocalToUnit=*/true, /*IsDefined=*/true); + GV.addMetadata(LLVMContext::MD_dbg, *DGVE); + DB.finalize(); +} + +FunctionType *getCheckFunctionType(LLVMContext &Ctx) { + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + return FunctionType::get(VoidTy, VoidPtrTy, false); +} + +Function *createDefaultCheckFunction(Module &M, bool UseX86FastCall) { + LLVMContext &Ctx = M.getContext(); + const char *DefaultCheckFunctionName = + UseX86FastCall ? "_JustMyCode_Default" : "__JustMyCode_Default"; + // Create the function. + Function *DefaultCheckFunc = + Function::Create(getCheckFunctionType(Ctx), GlobalValue::ExternalLinkage, + DefaultCheckFunctionName, &M); + DefaultCheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + DefaultCheckFunc->addParamAttr(0, Attribute::NoUndef); + if (UseX86FastCall) + DefaultCheckFunc->addParamAttr(0, Attribute::InReg); + + BasicBlock *EntryBB = BasicBlock::Create(Ctx, "", DefaultCheckFunc); + ReturnInst::Create(Ctx, EntryBB); + return DefaultCheckFunc; +} +} // namespace + +bool JMCInstrumenter::runOnModule(Module &M) { + bool Changed = false; + LLVMContext &Ctx = M.getContext(); + Triple ModuleTriple(M.getTargetTriple()); + bool IsMSVC = ModuleTriple.isKnownWindowsMSVCEnvironment(); + bool IsELF = ModuleTriple.isOSBinFormatELF(); + assert((IsELF || IsMSVC) && "Unsupported triple for JMC"); + bool UseX86FastCall = IsMSVC && ModuleTriple.getArch() == Triple::x86; + const char *const FlagSymbolSection = IsELF ? ".just.my.code" : ".msvcjmc"; + + GlobalValue *CheckFunction = nullptr; + DenseMap<DISubprogram *, Constant *> SavedFlags(8); + for (auto &F : M) { + if (F.isDeclaration()) + continue; + auto *SP = F.getSubprogram(); + if (!SP) + continue; + + Constant *&Flag = SavedFlags[SP]; + if (!Flag) { + std::string FlagName = getFlagName(*SP, UseX86FastCall); + IntegerType *FlagTy = Type::getInt8Ty(Ctx); + Flag = M.getOrInsertGlobal(FlagName, FlagTy, [&] { + // FIXME: Put the GV in comdat and have linkonce_odr linkage to save + // .msvcjmc section space? maybe not worth it. + GlobalVariable *GV = new GlobalVariable( + M, FlagTy, /*isConstant=*/false, GlobalValue::InternalLinkage, + ConstantInt::get(FlagTy, 1), FlagName); + GV->setSection(FlagSymbolSection); + GV->setAlignment(Align(1)); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + attachDebugInfo(*GV, *SP); + return GV; + }); + } + + if (!CheckFunction) { + Function *DefaultCheckFunc = + createDefaultCheckFunction(M, UseX86FastCall); + if (IsELF) { + DefaultCheckFunc->setName(CheckFunctionName); + DefaultCheckFunc->setLinkage(GlobalValue::WeakAnyLinkage); + CheckFunction = DefaultCheckFunc; + } else { + assert(!M.getFunction(CheckFunctionName) && + "JMC instrument more than once?"); + auto *CheckFunc = cast<Function>( + M.getOrInsertFunction(CheckFunctionName, getCheckFunctionType(Ctx)) + .getCallee()); + CheckFunc->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + CheckFunc->addParamAttr(0, Attribute::NoUndef); + if (UseX86FastCall) { + CheckFunc->setCallingConv(CallingConv::X86_FastCall); + CheckFunc->addParamAttr(0, Attribute::InReg); + } + CheckFunction = CheckFunc; + + StringRef DefaultCheckFunctionName = DefaultCheckFunc->getName(); + appendToUsed(M, {DefaultCheckFunc}); + Comdat *C = M.getOrInsertComdat(DefaultCheckFunctionName); + C->setSelectionKind(Comdat::Any); + DefaultCheckFunc->setComdat(C); + // Add a linker option /alternatename to set the default implementation + // for the check function. + // https://devblogs.microsoft.com/oldnewthing/20200731-00/?p=104024 + std::string AltOption = std::string("/alternatename:") + + CheckFunctionName + "=" + + DefaultCheckFunctionName.str(); + llvm::Metadata *Ops[] = {llvm::MDString::get(Ctx, AltOption)}; + MDTuple *N = MDNode::get(Ctx, Ops); + M.getOrInsertNamedMetadata("llvm.linker.options")->addOperand(N); + } + } + // FIXME: it would be nice to make CI scheduling boundary, although in + // practice it does not matter much. + auto *CI = CallInst::Create(getCheckFunctionType(Ctx), CheckFunction, + {Flag}, "", &*F.begin()->getFirstInsertionPt()); + CI->addParamAttr(0, Attribute::NoUndef); + if (UseX86FastCall) { + CI->setCallingConv(CallingConv::X86_FastCall); + CI->addParamAttr(0, Attribute::InReg); + } + + Changed = true; + } + return Changed; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 0d3685d4141c..3192dcadb5f5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -23,20 +23,19 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt<bool> EnableTrapUnreachable("trap-unreachable", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable generating trap for unreachable")); +static cl::opt<bool> + EnableTrapUnreachable("trap-unreachable", cl::Hidden, + cl::desc("Enable generating trap for unreachable")); void LLVMTargetMachine::initAsmInfo() { MRI.reset(TheTarget.createMCRegInfo(getTargetTriple().str())); @@ -99,7 +98,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, } TargetTransformInfo -LLVMTargetMachine::getTargetTransformInfo(const Function &F) { +LLVMTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(BasicTTIImpl(this, F)); } @@ -164,22 +163,35 @@ Expected<std::unique_ptr<MCStreamer>> LLVMTargetMachine::createMCStreamer( // Create a code emitter if asked to show the encoding. std::unique_ptr<MCCodeEmitter> MCE; if (Options.MCOptions.ShowMCEncoding) - MCE.reset(getTarget().createMCCodeEmitter(MII, MRI, Context)); + MCE.reset(getTarget().createMCCodeEmitter(MII, Context)); + + bool UseDwarfDirectory = false; + switch (Options.MCOptions.MCUseDwarfDirectory) { + case MCTargetOptions::DisableDwarfDirectory: + UseDwarfDirectory = false; + break; + case MCTargetOptions::EnableDwarfDirectory: + UseDwarfDirectory = true; + break; + case MCTargetOptions::DefaultDwarfDirectory: + UseDwarfDirectory = MAI.enableDwarfFileDirectoryDefault(); + break; + } std::unique_ptr<MCAsmBackend> MAB( getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions)); auto FOut = std::make_unique<formatted_raw_ostream>(Out); MCStreamer *S = getTarget().createAsmStreamer( Context, std::move(FOut), Options.MCOptions.AsmVerbose, - Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE), - std::move(MAB), Options.MCOptions.ShowMCInst); + UseDwarfDirectory, InstPrinter, std::move(MCE), std::move(MAB), + Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; } case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, Context); if (!MCE) return make_error<StringError>("createMCCodeEmitter failed", inconvertibleErrorCode()); @@ -252,6 +264,9 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, "Cannot emit MC with limited codegen pipeline"); Ctx = &MMIWP->getMMI().getContext(); + // libunwind is unable to load compact unwind dynamically, so we must generate + // DWARF unwind info for the JIT. + Options.MCOptions.EmitDwarfUnwind = EmitDwarfUnwindType::Always; if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); @@ -259,8 +274,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, // emission fails. const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCRegisterInfo &MRI = *getMCRegisterInfo(); - MCCodeEmitter *MCE = - getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions); if (!MCE || !MAB) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp index 63a0d0c1c43e..39b44b917d9e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp @@ -14,6 +14,7 @@ ///===---------------------------------------------------------------------===// #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/InitializePasses.h" using namespace llvm; @@ -87,7 +88,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const { OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>(); OwnedMBFI->calculate(*MF, MBPI, *MLI); - return *OwnedMBFI.get(); + return *OwnedMBFI; } bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction( diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 6af5f07d801a..30ca8bd871e8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -84,21 +84,18 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -106,27 +103,23 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GenericIteratedDominanceFrontier.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include <algorithm> #include <cassert> +#include <climits> #include <cstdint> #include <functional> -#include <limits.h> -#include <limits> #include <queue> #include <tuple> #include <utility> @@ -266,7 +259,7 @@ public: /// object fields to track variable locations as we step through the block. /// FIXME: could just examine mloctracker instead of passing in \p mlocs? void - loadInlocs(MachineBasicBlock &MBB, ValueIDNum *MLocs, + loadInlocs(MachineBasicBlock &MBB, ValueTable &MLocs, const SmallVectorImpl<std::pair<DebugVariable, DbgValue>> &VLocs, unsigned NumLocs) { ActiveMLocs.clear(); @@ -729,6 +722,20 @@ MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII, StackSlotIdxes.insert({{Size, Offs}, Idx}); } + // There may also be strange register class sizes (think x86 fp80s). + for (const TargetRegisterClass *RC : TRI.regclasses()) { + unsigned Size = TRI.getRegSizeInBits(*RC); + + // We might see special reserved values as sizes, and classes for other + // stuff the machine tries to model. If it's more than 512 bits, then it + // is very unlikely to be a register than can be spilt. + if (Size > 512) + continue; + + unsigned Idx = StackSlotIdxes.size(); + StackSlotIdxes.insert({{Size, 0}, Idx}); + } + for (auto &Idx : StackSlotIdxes) StackIdxesToPos[Idx.second] = Idx.first; @@ -863,19 +870,72 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, // the variable is. if (Offset == 0) { const SpillLoc &Spill = SpillLocs[SpillID.id()]; - Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, - Spill.SpillOffset); unsigned Base = Spill.SpillBase; MIB.addReg(Base); - MIB.addImm(0); - // Being on the stack makes this location indirect; if it was _already_ - // indirect though, we need to add extra indirection. See this test for - // a scenario where this happens: - // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll + // There are several ways we can dereference things, and several inputs + // to consider: + // * NRVO variables will appear with IsIndirect set, but should have + // nothing else in their DIExpressions, + // * Variables with DW_OP_stack_value in their expr already need an + // explicit dereference of the stack location, + // * Values that don't match the variable size need DW_OP_deref_size, + // * Everything else can just become a simple location expression. + + // We need to use deref_size whenever there's a mismatch between the + // size of value and the size of variable portion being read. + // Additionally, we should use it whenever dealing with stack_value + // fragments, to avoid the consumer having to determine the deref size + // from DW_OP_piece. + bool UseDerefSize = false; + unsigned ValueSizeInBits = getLocSizeInBits(*MLoc); + unsigned DerefSizeInBytes = ValueSizeInBits / 8; + if (auto Fragment = Var.getFragment()) { + unsigned VariableSizeInBits = Fragment->SizeInBits; + if (VariableSizeInBits != ValueSizeInBits || Expr->isComplex()) + UseDerefSize = true; + } else if (auto Size = Var.getVariable()->getSizeInBits()) { + if (*Size != ValueSizeInBits) { + UseDerefSize = true; + } + } + if (Properties.Indirect) { - std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; - Expr = DIExpression::append(Expr, Elts); + // This is something like an NRVO variable, where the pointer has been + // spilt to the stack, or a dbg.addr pointing at a coroutine frame + // field. It should end up being a memory location, with the pointer + // to the variable loaded off the stack with a deref. It can't be a + // DW_OP_stack_value expression. + assert(!Expr->isImplicit()); + Expr = TRI.prependOffsetExpression( + Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter, + Spill.SpillOffset); + MIB.addImm(0); + } else if (UseDerefSize) { + // We're loading a value off the stack that's not the same size as the + // variable. Add / subtract stack offset, explicitly deref with a size, + // and add DW_OP_stack_value if not already present. + SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, + DerefSizeInBytes}; + Expr = DIExpression::prependOpcodes(Expr, Ops, true); + unsigned Flags = DIExpression::StackValue | DIExpression::ApplyOffset; + Expr = TRI.prependOffsetExpression(Expr, Flags, Spill.SpillOffset); + MIB.addReg(0); + } else if (Expr->isComplex()) { + // A variable with no size ambiguity, but with extra elements in it's + // expression. Manually dereference the stack location. + assert(Expr->isComplex()); + Expr = TRI.prependOffsetExpression( + Expr, DIExpression::ApplyOffset | DIExpression::DerefAfter, + Spill.SpillOffset); + MIB.addReg(0); + } else { + // A plain value that has been spilt to the stack, with no further + // context. Request a location expression, marking the DBG_VALUE as + // IsIndirect. + Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset, + Spill.SpillOffset); + MIB.addImm(0); } } else { // This is a stack location with a weird subregister offset: emit an undef @@ -899,7 +959,7 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc, } /// Default construct and initialize the pass. -InstrRefBasedLDV::InstrRefBasedLDV() {} +InstrRefBasedLDV::InstrRefBasedLDV() = default; bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const { unsigned Reg = MTracker->LocIdxToLocID[L]; @@ -1022,8 +1082,8 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) { } bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns) { + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns) { if (!MI.isDebugRef()) return false; @@ -1091,15 +1151,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI, if (L) NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L); } else if (OpNo != MachineFunction::DebugOperandMemNumber) { - assert(OpNo < TargetInstr.getNumOperands()); - const MachineOperand &MO = TargetInstr.getOperand(OpNo); - - // Today, this can only be a register. - assert(MO.isReg() && MO.isDef()); + // Permit the debug-info to be completely wrong: identifying a nonexistant + // operand, or one that is not a register definition, means something + // unexpected happened during optimisation. Broken debug-info, however, + // shouldn't crash the compiler -- instead leave the variable value as + // None, which will make it appear "optimised out". + if (OpNo < TargetInstr.getNumOperands()) { + const MachineOperand &MO = TargetInstr.getOperand(OpNo); + + if (MO.isReg() && MO.isDef() && MO.getReg()) { + unsigned LocID = MTracker->getLocID(MO.getReg()); + LocIdx L = MTracker->LocIDToLocIdx[LocID]; + NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + } + } - unsigned LocID = MTracker->getLocID(MO.getReg()); - LocIdx L = MTracker->LocIDToLocIdx[LocID]; - NewID = ValueIDNum(BlockNo, InstrIt->second.second, L); + if (!NewID) { + LLVM_DEBUG( + { dbgs() << "Seen instruction reference to illegal operand\n"; }); + } } // else: NewID is left as None. } else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) { @@ -1249,7 +1319,16 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(0); unsigned InstrNum = MI.getOperand(1).getImm(); - if (MO.isReg()) { + auto EmitBadPHI = [this, &MI, InstrNum](void) -> bool { + // Helper lambda to do any accounting when we fail to find a location for + // a DBG_PHI. This can happen if DBG_PHIs are malformed, or refer to a + // dead stack slot, for example. + // Record a DebugPHIRecord with an empty value + location. + DebugPHINumToValue.push_back({InstrNum, MI.getParent(), None, None}); + return true; + }; + + if (MO.isReg() && MO.getReg()) { // The value is whatever's currently in the register. Read and record it, // to be analysed later. Register Reg = MO.getReg(); @@ -1261,15 +1340,14 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { // Ensure this register is tracked. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) MTracker->lookupOrTrackRegister(*RAI); - } else { + } else if (MO.isFI()) { // The value is whatever's in this stack slot. - assert(MO.isFI()); unsigned FI = MO.getIndex(); // If the stack slot is dead, then this was optimized away. // FIXME: stack slot colouring should account for slots that get merged. if (MFI->isDeadObjectIndex(FI)) - return true; + return EmitBadPHI(); // Identify this spill slot, ensure it's tracked. Register Base; @@ -1280,43 +1358,27 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) { // We might be able to find a value, but have chosen not to, to avoid // tracking too much stack information. if (!SpillNo) - return true; + return EmitBadPHI(); - // Problem: what value should we extract from the stack? LLVM does not - // record what size the last store to the slot was, and it would become - // sketchy after stack slot colouring anyway. Take a look at what values - // are stored on the stack, and pick the largest one that wasn't def'd - // by a spill (i.e., the value most likely to have been def'd in a register - // and then spilt. - std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8}; - Optional<ValueIDNum> Result = None; - Optional<LocIdx> SpillLoc = None; - for (unsigned CS : CandidateSizes) { - unsigned SpillID = MTracker->getLocID(*SpillNo, {CS, 0}); - SpillLoc = MTracker->getSpillMLoc(SpillID); - ValueIDNum Val = MTracker->readMLoc(*SpillLoc); - // If this value was defined in it's own position, then it was probably - // an aliasing index of a small value that was spilt. - if (Val.getLoc() != SpillLoc->asU64()) { - Result = Val; - break; - } - } + // Any stack location DBG_PHI should have an associate bit-size. + assert(MI.getNumOperands() == 3 && "Stack DBG_PHI with no size?"); + unsigned slotBitSize = MI.getOperand(2).getImm(); - // If we didn't find anything, we're probably looking at a PHI, or a memory - // store folded into an instruction. FIXME: Take a guess that's it's 64 - // bits. This isn't ideal, but tracking the size that the spill is - // "supposed" to be is more complex, and benefits a small number of - // locations. - if (!Result) { - unsigned SpillID = MTracker->getLocID(*SpillNo, {64, 0}); - SpillLoc = MTracker->getSpillMLoc(SpillID); - Result = MTracker->readMLoc(*SpillLoc); - } + unsigned SpillID = MTracker->getLocID(*SpillNo, {slotBitSize, 0}); + LocIdx SpillLoc = MTracker->getSpillMLoc(SpillID); + ValueIDNum Result = MTracker->readMLoc(SpillLoc); // Record this DBG_PHI for later analysis. - auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc}); + auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), Result, SpillLoc}); DebugPHINumToValue.push_back(DbgPHI); + } else { + // Else: if the operand is neither a legal register or a stack slot, then + // we're being fed illegal debug-info. Record an empty PHI, so that any + // debug users trying to read this number will be put off trying to + // interpret the value. + LLVM_DEBUG( + { dbgs() << "Seen DBG_PHI with unrecognised operand format\n"; }); + return EmitBadPHI(); } return true; @@ -1614,11 +1676,6 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) { LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID); auto ReadValue = MTracker->readMLoc(SrcIdx); MTracker->setReg(DestReg, ReadValue); - - if (TTracker) { - LocIdx DstLoc = MTracker->getRegMLoc(DestReg); - TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator()); - } }; for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) { @@ -1755,8 +1812,8 @@ void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) { AllSeenFragments.insert(ThisFragment); } -void InstrRefBasedLDV::process(MachineInstr &MI, ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns) { +void InstrRefBasedLDV::process(MachineInstr &MI, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns) { // Try to interpret an MI as a debug or transfer instruction. Only if it's // none of these should we interpret it's register defs as new value // definitions. @@ -1806,7 +1863,10 @@ void InstrRefBasedLDV::produceMLocTransferFunction( // Step through each instruction in this block. for (auto &MI : MBB) { - process(MI); + // Pass in an empty unique_ptr for the value tables when accumulating the + // machine transfer function. + process(MI, nullptr, nullptr); + // Also accumulate fragment map. if (MI.isDebugValue() || MI.isDebugRef()) accumulateFragmentMap(MI); @@ -1895,7 +1955,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction( bool InstrRefBasedLDV::mlocJoin( MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs) { + FuncValueTable &OutLocs, ValueTable &InLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; @@ -1996,7 +2056,7 @@ void InstrRefBasedLDV::findStackIndexInterference( void InstrRefBasedLDV::placeMLocPHIs( MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, - ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) { + FuncValueTable &MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) { SmallVector<unsigned, 4> StackUnits; findStackIndexInterference(StackUnits); @@ -2125,7 +2185,7 @@ void InstrRefBasedLDV::placeMLocPHIs( } void InstrRefBasedLDV::buildMLocValueMap( - MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs, + MachineFunction &MF, FuncValueTable &MInLocs, FuncValueTable &MOutLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) { std::priority_queue<unsigned int, std::vector<unsigned int>, std::greater<unsigned int>> @@ -2267,7 +2327,7 @@ void InstrRefBasedLDV::BlockPHIPlacement( Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc( const MachineBasicBlock &MBB, const DebugVariable &Var, - const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs, const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) { // Collect a set of locations from predecessor where its live-out value can // be found. @@ -2535,7 +2595,7 @@ void InstrRefBasedLDV::getBlocksForScope( void InstrRefBasedLDV::buildVLocValueMap( const DILocation *DILoc, const SmallSet<DebugVariable, 4> &VarsWeCareAbout, SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output, - ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + FuncValueTable &MOutLocs, FuncValueTable &MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs) { // This method is much like buildMLocValueMap: but focuses on a single // LexicalScope at a time. Pick out a set of blocks and variables that are @@ -2920,7 +2980,7 @@ void InstrRefBasedLDV::makeDepthFirstEjectionMap( bool InstrRefBasedLDV::depthFirstVLocAndEmit( unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToAssignBlocks, - LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF, DenseMap<DebugVariable, unsigned> &AllVarsNumbering, const TargetPassConfig &TPC) { @@ -2929,15 +2989,8 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( VTracker = nullptr; // No scopes? No variable locations. - if (!LS.getCurrentFunctionScope()) { - // FIXME: this is a sticking plaster to prevent a memory leak, these - // pointers will be automagically freed by being unique pointers, shortly. - for (unsigned int I = 0; I < MaxNumBlocks; ++I) { - delete[] MInLocs[I]; - delete[] MOutLocs[I]; - } + if (!LS.getCurrentFunctionScope()) return false; - } // Build map from block number to the last scope that uses the block. SmallVector<unsigned, 16> EjectionMap; @@ -2961,17 +3014,14 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( CurBB = BBNum; CurInst = 1; for (auto &MI : MBB) { - process(MI, MOutLocs, MInLocs); + process(MI, MOutLocs.get(), MInLocs.get()); TTracker->checkInstForNewValues(CurInst, MI.getIterator()); ++CurInst; } // Free machine-location tables for this block. - delete[] MInLocs[BBNum]; - delete[] MOutLocs[BBNum]; - // Make ourselves brittle to use-after-free errors. - MInLocs[BBNum] = nullptr; - MOutLocs[BBNum] = nullptr; + MInLocs[BBNum].reset(); + MOutLocs[BBNum].reset(); // We don't need live-in variable values for this block either. Output[BBNum].clear(); AllTheVLocs[BBNum].clear(); @@ -3039,16 +3089,6 @@ bool InstrRefBasedLDV::depthFirstVLocAndEmit( if (MOutLocs[MBB->getNumber()]) EjectBlock(*MBB); - // Finally, there might have been gaps in the block numbering, from dead - // blocks being deleted or folded. In those scenarios, we might allocate a - // block-table that's never ejected, meaning we have to free it at the end. - for (unsigned int I = 0; I < MaxNumBlocks; ++I) { - if (MInLocs[I]) { - delete[] MInLocs[I]; - delete[] MOutLocs[I]; - } - } - return emitTransfers(AllVarsNumbering); } @@ -3135,24 +3175,24 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, assert(MaxNumBlocks >= 0); ++MaxNumBlocks; + initialSetup(MF); + MLocTransfer.resize(MaxNumBlocks); vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr)); SavedLiveIns.resize(MaxNumBlocks); - initialSetup(MF); - produceMLocTransferFunction(MF, MLocTransfer, MaxNumBlocks); // Allocate and initialize two array-of-arrays for the live-in and live-out // machine values. The outer dimension is the block number; while the inner // dimension is a LocIdx from MLocTracker. - ValueIDNum **MOutLocs = new ValueIDNum *[MaxNumBlocks]; - ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks]; + FuncValueTable MOutLocs = std::make_unique<ValueTable[]>(MaxNumBlocks); + FuncValueTable MInLocs = std::make_unique<ValueTable[]>(MaxNumBlocks); unsigned NumLocs = MTracker->getNumLocs(); for (int i = 0; i < MaxNumBlocks; ++i) { // These all auto-initialize to ValueIDNum::EmptyValue - MOutLocs[i] = new ValueIDNum[NumLocs]; - MInLocs[i] = new ValueIDNum[NumLocs]; + MOutLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs); + MInLocs[i] = std::make_unique<ValueIDNum[]>(NumLocs); } // Solve the machine value dataflow problem using the MLocTransfer function, @@ -3165,7 +3205,10 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, // either live-through machine values, or PHIs. for (auto &DBG_PHI : DebugPHINumToValue) { // Identify unresolved block-live-ins. - ValueIDNum &Num = DBG_PHI.ValueRead; + if (!DBG_PHI.ValueRead) + continue; + + ValueIDNum &Num = *DBG_PHI.ValueRead; if (!Num.isPHI()) continue; @@ -3186,7 +3229,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, MTracker->loadFromArray(MInLocs[CurBB], CurBB); CurInst = 1; for (auto &MI : MBB) { - process(MI, MOutLocs, MInLocs); + process(MI, MOutLocs.get(), MInLocs.get()); ++CurInst; } MTracker->reset(); @@ -3241,12 +3284,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, << " has " << MaxNumBlocks << " basic blocks and " << VarAssignCount << " variable assignments, exceeding limits.\n"); - - // Perform memory cleanup that emitLocations would do otherwise. - for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) { - delete[] MOutLocs[Idx]; - delete[] MInLocs[Idx]; - } } else { // Optionally, solve the variable value problem and emit to blocks by using // a lexical-scope-depth search. It should be functionally identical to @@ -3256,10 +3293,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, SavedLiveIns, MOutLocs, MInLocs, vlocs, MF, AllVarsNumbering, *TPC); } - // Elements of these arrays will be deleted by emitLocations. - delete[] MOutLocs; - delete[] MInLocs; - delete MTracker; delete TTracker; MTracker = nullptr; @@ -3376,9 +3409,10 @@ public: /// Machine location where any PHI must occur. LocIdx Loc; /// Table of live-in machine value numbers for blocks / locations. - ValueIDNum **MLiveIns; + const ValueTable *MLiveIns; - LDVSSAUpdater(LocIdx L, ValueIDNum **MLiveIns) : Loc(L), MLiveIns(MLiveIns) {} + LDVSSAUpdater(LocIdx L, const ValueTable *MLiveIns) + : Loc(L), MLiveIns(MLiveIns) {} void reset() { for (auto &Block : BlockMap) @@ -3535,11 +3569,13 @@ public: } // end namespace llvm -Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns, - MachineInstr &Here, - uint64_t InstrNum) { +Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs( + MachineFunction &MF, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) { + assert(MLiveOuts && MLiveIns && + "Tried to resolve DBG_PHI before location " + "tables allocated?"); + // This function will be called twice per DBG_INSTR_REF, and might end up // computing lots of SSA information: memoize it. auto SeenDbgPHIIt = SeenDbgPHIs.find(&Here); @@ -3553,8 +3589,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIs(MachineFunction &MF, } Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( - MachineFunction &MF, ValueIDNum **MLiveOuts, ValueIDNum **MLiveIns, - MachineInstr &Here, uint64_t InstrNum) { + MachineFunction &MF, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum) { // Pick out records of DBG_PHI instructions that have been observed. If there // are none, then we cannot compute a value number. auto RangePair = std::equal_range(DebugPHINumToValue.begin(), @@ -3566,17 +3602,24 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( if (LowerIt == UpperIt) return None; + // If any DBG_PHIs referred to a location we didn't understand, don't try to + // compute a value. There might be scenarios where we could recover a value + // for some range of DBG_INSTR_REFs, but at this point we can have high + // confidence that we've seen a bug. + auto DBGPHIRange = make_range(LowerIt, UpperIt); + for (const DebugPHIRecord &DBG_PHI : DBGPHIRange) + if (!DBG_PHI.ValueRead) + return None; + // If there's only one DBG_PHI, then that is our value number. if (std::distance(LowerIt, UpperIt) == 1) - return LowerIt->ValueRead; - - auto DBGPHIRange = make_range(LowerIt, UpperIt); + return *LowerIt->ValueRead; // Pick out the location (physreg, slot) where any PHIs must occur. It's // technically possible for us to merge values in different registers in each // block, but highly unlikely that LLVM will generate such code after register // allocation. - LocIdx Loc = LowerIt->ReadLoc; + LocIdx Loc = *LowerIt->ReadLoc; // We have several DBG_PHIs, and a use position (the Here inst). All each // DBG_PHI does is identify a value at a program position. We can treat each @@ -3595,7 +3638,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( // for the SSAUpdater. for (const auto &DBG_PHI : DBGPHIRange) { LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); - const ValueIDNum &Num = DBG_PHI.ValueRead; + const ValueIDNum &Num = *DBG_PHI.ValueRead; AvailableValues.insert(std::make_pair(Block, Num.asU64())); } @@ -3629,7 +3672,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( // Define all the input DBG_PHI values in ValidatedValues. for (const auto &DBG_PHI : DBGPHIRange) { LDVSSABlock *Block = Updater.getSSALDVBlock(DBG_PHI.MBB); - const ValueIDNum &Num = DBG_PHI.ValueRead; + const ValueIDNum &Num = *DBG_PHI.ValueRead; ValidatedValues.insert(std::make_pair(Block, Num)); } @@ -3654,7 +3697,7 @@ Optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl( return None; ValueIDNum ValueToCheck; - ValueIDNum *BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; + const ValueTable &BlockLiveOuts = MLiveOuts[PHIIt.first->BB.getNumber()]; auto VVal = ValidatedValues.find(PHIIt.first); if (VVal == ValidatedValues.end()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h index d778561db471..70aae47c8bdc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -10,17 +10,14 @@ #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/TargetFrameLowering.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "LiveDebugValues.h" @@ -171,6 +168,13 @@ public: static ValueIDNum TombstoneValue; }; +/// Type for a table of values in a block. +using ValueTable = std::unique_ptr<ValueIDNum[]>; + +/// Type for a table-of-table-of-values, i.e., the collection of either +/// live-in or live-out values for each block in the function. +using FuncValueTable = std::unique_ptr<ValueTable[]>; + /// Thin wrapper around an integer -- designed to give more type safety to /// spill location numbers. class SpillLocationNo { @@ -192,7 +196,7 @@ public: }; /// Meta qualifiers for a value. Pair of whatever expression is used to qualify -/// the the value, and Boolean of whether or not it's indirect. +/// the value, and Boolean of whether or not it's indirect. class DbgValueProperties { public: DbgValueProperties(const DIExpression *DIExpr, bool Indirect) @@ -507,7 +511,7 @@ public: /// Load values for each location from array of ValueIDNums. Take current /// bbnum just in case we read a value from a hitherto untouched register. - void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) { + void loadFromArray(ValueTable &Locs, unsigned NewCurBB) { CurBB = NewCurBB; // Iterate over all tracked locations, and load each locations live-in // value into our local index. @@ -629,6 +633,19 @@ public: /// Return true if Idx is a spill machine location. bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; } + /// How large is this location (aka, how wide is a value defined there?). + unsigned getLocSizeInBits(LocIdx L) const { + unsigned ID = LocIdxToLocID[L]; + if (!isSpill(L)) { + return TRI.getRegSizeInBits(Register(ID), MF.getRegInfo()); + } else { + // The slot location on the stack is uninteresting, we care about the + // position of the value within the slot (which comes with a size). + StackSlotPos Pos = locIDToSpillIdx(ID); + return Pos.first; + } + } + MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); } MLocIterator end() { @@ -851,10 +868,16 @@ private: /// Record of where we observed a DBG_PHI instruction. class DebugPHIRecord { public: - uint64_t InstrNum; ///< Instruction number of this DBG_PHI. - MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred. - ValueIDNum ValueRead; ///< The value number read by the DBG_PHI. - LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads. + /// Instruction number of this DBG_PHI. + uint64_t InstrNum; + /// Block where DBG_PHI occurred. + MachineBasicBlock *MBB; + /// The value number read by the DBG_PHI -- or None if it didn't refer to + /// a value. + Optional<ValueIDNum> ValueRead; + /// Register/Stack location the DBG_PHI reads -- or None if it referred to + /// something unexpected. + Optional<LocIdx> ReadLoc; operator unsigned() const { return InstrNum; } }; @@ -909,8 +932,8 @@ private: extractSpillBaseRegAndOffset(const MachineInstr &MI); /// Observe a single instruction while stepping through a block. - void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr, - ValueIDNum **MLiveIns = nullptr); + void process(MachineInstr &MI, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns); /// Examines whether \p MI is a DBG_VALUE and notifies trackers. /// \returns true if MI was recognized and processed. @@ -918,8 +941,8 @@ private: /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers. /// \returns true if MI was recognized and processed. - bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns); + bool transferDebugInstrRef(MachineInstr &MI, const ValueTable *MLiveOuts, + const ValueTable *MLiveIns); /// Stores value-information about where this PHI occurred, and what /// instruction number is associated with it. @@ -951,13 +974,13 @@ private: /// \p InstrNum Debug instruction number defined by DBG_PHI instructions. /// \returns The machine value number at position Here, or None. Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns, MachineInstr &Here, - uint64_t InstrNum); + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, + MachineInstr &Here, uint64_t InstrNum); Optional<ValueIDNum> resolveDbgPHIsImpl(MachineFunction &MF, - ValueIDNum **MLiveOuts, - ValueIDNum **MLiveIns, + const ValueTable *MLiveOuts, + const ValueTable *MLiveIns, MachineInstr &Here, uint64_t InstrNum); @@ -975,8 +998,8 @@ private: /// live-out arrays to the (initialized to zero) multidimensional arrays in /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block /// number, the inner by LocIdx. - void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs, - ValueIDNum **MOutLocs, + void buildMLocValueMap(MachineFunction &MF, FuncValueTable &MInLocs, + FuncValueTable &MOutLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer); /// Examine the stack indexes (i.e. offsets within the stack) to find the @@ -987,7 +1010,7 @@ private: /// the IDF of each register. void placeMLocPHIs(MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks, - ValueIDNum **MInLocs, + FuncValueTable &MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer); /// Propagate variable values to blocks in the common case where there's @@ -1018,7 +1041,7 @@ private: /// is true, revisiting this block is necessary. bool mlocJoin(MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs); + FuncValueTable &OutLocs, ValueTable &InLocs); /// Produce a set of blocks that are in the current lexical scope. This means /// those blocks that contain instructions "in" the scope, blocks where @@ -1046,11 +1069,11 @@ private: /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks /// locations through. void buildVLocValueMap(const DILocation *DILoc, - const SmallSet<DebugVariable, 4> &VarsWeCareAbout, - SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, - LiveInsT &Output, ValueIDNum **MOutLocs, - ValueIDNum **MInLocs, - SmallVectorImpl<VLocTracker> &AllTheVLocs); + const SmallSet<DebugVariable, 4> &VarsWeCareAbout, + SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, + LiveInsT &Output, FuncValueTable &MOutLocs, + FuncValueTable &MInLocs, + SmallVectorImpl<VLocTracker> &AllTheVLocs); /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the /// live-in values coming from predecessors live-outs, and replaces any PHIs @@ -1068,7 +1091,7 @@ private: /// \returns Value ID of a machine PHI if an appropriate one is available. Optional<ValueIDNum> pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var, - const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs, + const LiveIdxT &LiveOuts, FuncValueTable &MOutLocs, const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders); /// Take collections of DBG_VALUE instructions stored in TTracker, and @@ -1098,7 +1121,7 @@ private: bool depthFirstVLocAndEmit( unsigned MaxNumBlocks, const ScopeToDILocT &ScopeToDILocation, const ScopeToVarsT &ScopeToVars, ScopeToAssignBlocksT &ScopeToBlocks, - LiveInsT &Output, ValueIDNum **MOutLocs, ValueIDNum **MInLocs, + LiveInsT &Output, FuncValueTable &MOutLocs, FuncValueTable &MInLocs, SmallVectorImpl<VLocTracker> &AllTheVLocs, MachineFunction &MF, DenseMap<DebugVariable, unsigned> &AllVarsNumbering, const TargetPassConfig &TPC); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index 40770b15aa35..141008ac2296 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -8,14 +8,16 @@ #include "LiveDebugValues.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetMachine.h" /// \file LiveDebugValues.cpp /// @@ -65,7 +67,7 @@ public: static char ID; LiveDebugValues(); - ~LiveDebugValues() {} + ~LiveDebugValues() = default; /// Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h index 8f0b2ec3e1fc..6cc1685c0022 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -9,12 +9,11 @@ #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/ADT/Triple.h" - namespace llvm { +class MachineDominatorTree; +class MachineFunction; +class TargetPassConfig; +class Triple; // Inline namespace for types / symbols shared between different // LiveDebugValues implementations. @@ -28,7 +27,7 @@ public: virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) = 0; - virtual ~LDVImpl() {} + virtual ~LDVImpl() = default; }; } // namespace SharedLiveDebugValues diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp index 42a0967bce3f..24c00b8a10ec 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -118,18 +118,15 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/UniqueVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -137,16 +134,11 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" @@ -922,14 +914,14 @@ private: std::unique_ptr<VarLocSet> &VLS = Locs[MBB]; if (!VLS) VLS = std::make_unique<VarLocSet>(Alloc); - return *VLS.get(); + return *VLS; } const VarLocSet &getVarLocsInMBB(const MachineBasicBlock *MBB, const VarLocInMBB &Locs) const { auto It = Locs.find(MBB); assert(It != Locs.end() && "MBB not in map"); - return *It->second.get(); + return *It->second; } /// Tests whether this instruction is a spill to a stack location. @@ -1035,9 +1027,9 @@ public: // Implementation //===----------------------------------------------------------------------===// -VarLocBasedLDV::VarLocBasedLDV() { } +VarLocBasedLDV::VarLocBasedLDV() = default; -VarLocBasedLDV::~VarLocBasedLDV() { } +VarLocBasedLDV::~VarLocBasedLDV() = default; /// Erase a variable from the set of open ranges, and additionally erase any /// fragments that may overlap it. If the VarLoc is a backup location, erase @@ -1948,7 +1940,7 @@ bool VarLocBasedLDV::join( // Just copy over the Out locs to incoming locs for the first visited // predecessor, and for all other predecessors join the Out locs. - VarLocSet &OutLocVLS = *OL->second.get(); + VarLocSet &OutLocVLS = *OL->second; if (!NumVisited) InLocsT = OutLocVLS; else @@ -2007,7 +1999,7 @@ void VarLocBasedLDV::flushPendingLocs(VarLocInMBB &PendingInLocs, for (auto &Iter : PendingInLocs) { // Map is keyed on a constant pointer, unwrap it so we can insert insts. auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first); - VarLocSet &Pending = *Iter.second.get(); + VarLocSet &Pending = *Iter.second; SmallVector<VarLoc, 32> VarLocs; collectAllVarLocs(VarLocs, Pending, VarLocIDs); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp index 6d806135240e..35cf25330186 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -38,11 +39,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -50,15 +49,12 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> #include <iterator> @@ -976,7 +972,7 @@ void UserValue::extendDef( if (Segment->end < Stop) { Stop = Segment->end; Kills = {Stop, {LII.first}}; - } else if (Segment->end == Stop && Kills.hasValue()) { + } else if (Segment->end == Stop && Kills) { // If multiple locations end at the same place, track all of them in // Kills. Kills->second.push_back(LII.first); @@ -1854,16 +1850,33 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { const TargetRegisterClass *TRC = MRI.getRegClass(Reg); unsigned SpillSize, SpillOffset; - // Test whether this location is legal with the given subreg. + unsigned regSizeInBits = TRI->getRegSizeInBits(*TRC); + if (SubReg) + regSizeInBits = TRI->getSubRegIdxSize(SubReg); + + // Test whether this location is legal with the given subreg. If the + // subregister has a nonzero offset, drop this location, it's too complex + // to describe. (TODO: future work). bool Success = TII->getStackSlotRange(TRC, SubReg, SpillSize, SpillOffset, *MF); - if (Success) { + if (Success && SpillOffset == 0) { auto Builder = BuildMI(*OrigMBB, OrigMBB->begin(), DebugLoc(), TII->get(TargetOpcode::DBG_PHI)); Builder.addFrameIndex(VRM->getStackSlot(Reg)); Builder.addImm(InstNum); + // Record how large the original value is. The stack slot might be + // merged and altered during optimisation, but we will want to know how + // large the value is, at this DBG_PHI. + Builder.addImm(regSizeInBits); + } + + LLVM_DEBUG( + if (SpillOffset != 0) { + dbgs() << "DBG_PHI for Vreg " << Reg << " subreg " << SubReg << + " has nonzero offset\n"; } + ); } // If there was no mapping for a value ID, it's optimized out. Create no // DBG_PHI, and any variables using this value will become optimized out. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp index 9ded0fb6ae0a..9378aaeb181c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveInterval.cpp @@ -348,23 +348,8 @@ private: //===----------------------------------------------------------------------===// LiveRange::iterator LiveRange::find(SlotIndex Pos) { - // This algorithm is basically std::upper_bound. - // Unfortunately, std::upper_bound cannot be used with mixed types until we - // adopt C++0x. Many libraries can do it, but not all. - if (empty() || Pos >= endIndex()) - return end(); - iterator I = begin(); - size_t Len = size(); - do { - size_t Mid = Len >> 1; - if (Pos < I[Mid].end) { - Len = Mid; - } else { - I += Mid + 1; - Len -= Mid + 1; - } - } while (Len); - return I; + return llvm::partition_point(*this, + [&](const Segment &X) { return X.end <= Pos; }); } VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp index 2756086cb8b1..3176d73b35f6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalCalc.cpp @@ -11,13 +11,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervalCalc.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -25,12 +21,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> #include <cassert> -#include <iterator> -#include <tuple> -#include <utility> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp index 50b31e1eb247..11a4ecf0bef9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -26,7 +26,8 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" // Merge a LiveInterval's segments. Guarantee no overlaps. -void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { +void LiveIntervalUnion::unify(const LiveInterval &VirtReg, + const LiveRange &Range) { if (Range.empty()) return; ++Tag; @@ -53,7 +54,8 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { } // Remove a live virtual register's segments from this union. -void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) { +void LiveIntervalUnion::extract(const LiveInterval &VirtReg, + const LiveRange &Range) { if (Range.empty()) return; ++Tag; @@ -99,7 +101,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { } #endif //!NDEBUG -LiveInterval *LiveIntervalUnion::getOneVReg() const { +const LiveInterval *LiveIntervalUnion::getOneVReg() const { if (empty()) return nullptr; for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { @@ -111,7 +113,8 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const { // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. -bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { +bool LiveIntervalUnion::Query::isSeenInterference( + const LiveInterval *VirtReg) const { return is_contained(InterferingVRegs, VirtReg); } @@ -147,14 +150,14 @@ LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) { } LiveRange::const_iterator LREnd = LR->end(); - LiveInterval *RecentReg = nullptr; + const LiveInterval *RecentReg = nullptr; while (LiveUnionI.valid()) { assert(LRI != LREnd && "Reached end of LR"); // Check for overlapping interference. while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) { // This is an overlap, record the interfering register. - LiveInterval *VReg = LiveUnionI.value(); + const LiveInterval *VReg = LiveUnionI.value(); if (VReg != RecentReg && !isSeenInterference(VReg)) { RecentReg = VReg; InterferingVRegs.push_back(VReg); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp index 9571afa434c1..7d825a8bf853 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveIntervals.cpp @@ -33,22 +33,20 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Statepoint.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" -#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/StackMaps.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -149,7 +147,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { getRegUnit(i); } LLVM_DEBUG(dump()); - return true; + return false; } void LiveIntervals::print(raw_ostream &OS, const Module* ) const { @@ -500,7 +498,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Create new live ranges with only minimal live segments per def. LiveRange NewLR; - createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); + createSegmentsForValues(NewLR, li->vnis()); extendSegmentsToUses(NewLR, WorkList, Reg, LaneBitmask::getNone()); // Move the trimmed segments back. @@ -604,7 +602,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, Register Reg) { // Create a new live ranges with only minimal live segments per def. LiveRange NewLR; - createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); + createSegmentsForValues(NewLR, SR.vnis()); extendSegmentsToUses(NewLR, WorkList, Reg, SR.LaneMask); // Move the trimmed ranges back. @@ -913,11 +911,11 @@ static bool hasLiveThroughUse(const MachineInstr *MI, Register Reg) { return false; } -bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, +bool LiveIntervals::checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs) { if (LI.empty()) return false; - LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end(); + LiveInterval::const_iterator LiveI = LI.begin(), LiveE = LI.end(); // Use a smaller arrays for local live ranges. ArrayRef<SlotIndex> Slots; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp index 3ef28042acb0..26f6e1ede1ad 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -20,11 +20,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp index 05768140cbdf..58eb4110f153 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -371,7 +371,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, const MachineOperand &MO = MI->getOperand(i-1); if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) continue; - MI->RemoveOperand(i-1); + MI->removeOperand(i-1); } LLVM_DEBUG(dbgs() << "Converted physregs to:\t" << *MI); } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp index 054f4370b609..8e56985246db 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp index 4c0172a930b5..6ca7f00a7885 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -78,13 +78,13 @@ void LiveRegMatrix::releaseMemory() { template <typename Callable> static bool foreachUnit(const TargetRegisterInfo *TRI, - LiveInterval &VRegInterval, MCRegister PhysReg, + const LiveInterval &VRegInterval, MCRegister PhysReg, Callable Func) { if (VRegInterval.hasSubRanges()) { for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { unsigned Unit = (*Units).first; LaneBitmask Mask = (*Units).second; - for (LiveInterval::SubRange &S : VRegInterval.subranges()) { + for (const LiveInterval::SubRange &S : VRegInterval.subranges()) { if ((S.LaneMask & Mask).any()) { if (Func(Unit, S)) return true; @@ -101,7 +101,7 @@ static bool foreachUnit(const TargetRegisterInfo *TRI, return false; } -void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) { +void LiveRegMatrix::assign(const LiveInterval &VirtReg, MCRegister PhysReg) { LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to " << printReg(PhysReg, TRI) << ':'); assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment"); @@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) { LLVM_DEBUG(dbgs() << '\n'); } -void LiveRegMatrix::unassign(LiveInterval &VirtReg) { +void LiveRegMatrix::unassign(const LiveInterval &VirtReg) { Register PhysReg = VRM->getPhys(VirtReg.reg()); LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg(), TRI) << " from " << printReg(PhysReg, TRI) << ':'); @@ -143,7 +143,7 @@ bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const { return false; } -bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, +bool LiveRegMatrix::checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg) { // Check if the cached information is valid. // The same BitVector can be reused for all PhysRegs. @@ -161,7 +161,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); } -bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, +bool LiveRegMatrix::checkRegUnitInterference(const LiveInterval &VirtReg, MCRegister PhysReg) { if (VirtReg.empty()) return false; @@ -183,7 +183,8 @@ LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR, } LiveRegMatrix::InterferenceKind -LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) { +LiveRegMatrix::checkInterference(const LiveInterval &VirtReg, + MCRegister PhysReg) { if (VirtReg.empty()) return IK_Free; @@ -237,7 +238,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, } Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const { - LiveInterval *VRegInterval = nullptr; + const LiveInterval *VRegInterval = nullptr; for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { if ((VRegInterval = Matrix[*Unit].getOneVReg())) return VRegInterval->reg(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp index 8df84ebf4f06..8fc5a929d77b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LiveStacks.cpp @@ -13,12 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveStacks.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "livestacks" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index 37fd3e4853ac..5f54d7cc8472 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -118,7 +117,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { // If the target doesn't want/need this pass, or if there are no locals // to consider, early exit. if (LocalObjectCount == 0 || !TRI->requiresVirtualBaseRegisters(MF)) - return true; + return false; // Make sure we have enough space to store the local offsets. LocalOffsets.resize(MFI.getObjectIndexEnd()); @@ -344,7 +343,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { MachineBasicBlock *Entry = &Fn.front(); - unsigned BaseReg = 0; + Register BaseReg; int64_t BaseOffset = 0; // Loop through the frame references and allocate for them as necessary. @@ -414,20 +413,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { continue; } - const MachineFunction *MF = MI.getMF(); - const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); - BaseReg = Fn.getRegInfo().createVirtualRegister(RC); - - LLVM_DEBUG(dbgs() << " Materializing base register" - << " at frame local offset " - << LocalOffset + InstrOffset); - // Tell the target to insert the instruction to initialize // the base register. // MachineBasicBlock::iterator InsertionPt = Entry->begin(); BaseReg = TRI->materializeFrameBaseRegister(Entry, FrameIdx, InstrOffset); - LLVM_DEBUG(dbgs() << " into " << printReg(BaseReg, TRI) << '\n'); + LLVM_DEBUG(dbgs() << " Materialized base register at frame local offset " + << LocalOffset + InstrOffset + << " into " << printReg(BaseReg, TRI) << '\n'); // The base register already includes any offset specified // by the instruction, so account for that so it doesn't get @@ -437,7 +430,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { ++NumBaseRegisters; UsedBaseReg = true; } - assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + assert(BaseReg && "Unable to allocate virtual base register!"); // Modify the instruction to use the new base register rather // than the frame index operand. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp index dce64ab9f5ca..b47c96e50831 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowLevelType.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp index a06d1d6255c7..984dc452fbfd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp index 3ec8c627f131..eea24d8e9353 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp @@ -27,15 +27,12 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include <queue> - using namespace llvm; #define DEBUG_TYPE "mir-canonicalizer" @@ -106,10 +103,7 @@ rescheduleLexographically(std::vector<MachineInstr *> instructions, StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II}); } - llvm::sort(StringInstrMap, - [](const StringInstrPair &a, const StringInstrPair &b) -> bool { - return (a.first < b.first); - }); + llvm::sort(StringInstrMap, llvm::less_first()); for (auto &II : StringInstrMap) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp index bf78594e9b23..3152102410d7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRFSDiscriminator.cpp @@ -15,12 +15,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" -#include <unordered_map> using namespace llvm; using namespace sampleprof; @@ -68,6 +70,8 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB, bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { if (!EnableFSDiscriminator) return false; + if (!MF.getFunction().isDebugInfoForProfiling()) + return false; bool Changed = false; using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>; @@ -131,6 +135,7 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) { if (Changed) { createFSDiscriminatorVariable(MF.getFunction().getParent()); LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n"); + (void) NumNewD; } return Changed; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp index 9f61dd9ef243..bc65700aba06 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRNamerPass.cpp @@ -18,11 +18,7 @@ #include "MIRVRegNamerUtils.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 0ca820f160aa..b0daa20913f5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" -#include <algorithm> #include <cassert> #include <cctype> #include <string> @@ -250,7 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("dereferenceable", MIToken::kw_dereferenceable) .Case("invariant", MIToken::kw_invariant) .Case("align", MIToken::kw_align) - .Case("basealign", MIToken::kw_align) + .Case("basealign", MIToken::kw_basealign) .Case("addrspace", MIToken::kw_addrspace) .Case("stack", MIToken::kw_stack) .Case("got", MIToken::kw_got) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6477965bdc21..40ae7053ea09 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -26,8 +26,6 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MIRFormatter.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -38,6 +36,8 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -60,7 +60,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" @@ -69,10 +68,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include <algorithm> #include <cassert> #include <cctype> #include <cstddef> @@ -744,7 +741,7 @@ bool MIParser::parseBasicBlockDefinition( MBB->setIsEHPad(IsLandingPad); MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget); MBB->setIsEHFuncletEntry(IsEHFuncletEntry); - if (SectionID.hasValue()) { + if (SectionID) { MBB->setSectionID(SectionID.getValue()); MF.setBBSectionsType(BasicBlockSection::List); } @@ -1094,11 +1091,23 @@ bool MIParser::parse(MachineInstr *&MI) { return true; } - // TODO: Check for extraneous machine operands. MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); MI->setFlags(Flags); - for (const auto &Operand : Operands) + + unsigned NumExplicitOps = 0; + for (const auto &Operand : Operands) { + bool IsImplicitOp = Operand.Operand.isReg() && Operand.Operand.isImplicit(); + if (!IsImplicitOp) { + if (!MCID.isVariadic() && NumExplicitOps >= MCID.getNumOperands() && + !Operand.Operand.isValidExcessOperand()) + return error(Operand.Begin, "too many operands for instruction"); + + ++NumExplicitOps; + } + MI->addOperand(MF, Operand.Operand); + } + if (assignRegisterTies(*MI, Operands)) return true; if (PreInstrSymbol) @@ -1609,7 +1618,7 @@ bool MIParser::assignRegisterTies(MachineInstr &MI, continue; // The parser ensures that this operand is a register use, so we just have // to check the tied-def operand. - unsigned DefIdx = Operands[I].TiedDefIdx.getValue(); + unsigned DefIdx = *Operands[I].TiedDefIdx; if (DefIdx >= E) return error(Operands[I].Begin, Twine("use of invalid tied-def operand index '" + @@ -1714,6 +1723,15 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, RegInfo->Kind == VRegInfo::REGBANK) return error("generic virtual registers must have a type"); } + + if (Flags & RegState::Define) { + if (Flags & RegState::Kill) + return error("cannot have a killed def operand"); + } else { + if (Flags & RegState::Dead) + return error("cannot have a dead use operand"); + } + Dest = MachineOperand::CreateReg( Reg, Flags & RegState::Define, Flags & RegState::Implicit, Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, @@ -2689,19 +2707,19 @@ bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) { return true; uint32_t *Mask = MF.allocateRegMask(); - while (true) { - if (Token.isNot(MIToken::NamedRegister)) - return error("expected a named register"); - Register Reg; - if (parseNamedRegister(Reg)) - return true; - lex(); - Mask[Reg / 32] |= 1U << (Reg % 32); + do { + if (Token.isNot(MIToken::rparen)) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + Register Reg; + if (parseNamedRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + } + // TODO: Report an error if the same register is used more than once. - if (Token.isNot(MIToken::comma)) - break; - lex(); - } + } while (consumeIfPresent(MIToken::comma)); if (expectAndConsume(MIToken::rparen)) return true; @@ -3269,11 +3287,21 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { MDNode *Range = nullptr; while (consumeIfPresent(MIToken::comma)) { switch (Token.kind()) { - case MIToken::kw_align: + case MIToken::kw_align: { // align is printed if it is different than size. - if (parseAlignment(BaseAlignment)) + uint64_t Alignment; + if (parseAlignment(Alignment)) return true; + if (Ptr.Offset & (Alignment - 1)) { + // MachineMemOperand::getAlign never returns a value greater than the + // alignment of offset, so this just guards against hand-written MIR + // that specifies a large "align" value when it should probably use + // "basealign" instead. + return error("specified alignment is more aligned than offset"); + } + BaseAlignment = Alignment; break; + } case MIToken::kw_basealign: // basealign is printed if it is different than align. if (parseAlignment(BaseAlignment)) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index f144639770bc..4944cb46c5b5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -13,13 +13,10 @@ #include "llvm/CodeGen/MIRParser/MIRParser.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -29,7 +26,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" @@ -46,6 +43,8 @@ using namespace llvm; namespace llvm { +class MDNode; +class RegisterBank; /// This class implements the parsing of LLVM IR that's embedded inside a MIR /// file. @@ -459,6 +458,12 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); + MF.setCallsEHReturn(YamlMF.CallsEHReturn); + MF.setCallsUnwindInit(YamlMF.CallsUnwindInit); + MF.setHasEHCatchret(YamlMF.HasEHCatchret); + MF.setHasEHScopes(YamlMF.HasEHScopes); + MF.setHasEHFunclets(YamlMF.HasEHFunclets); + if (YamlMF.Legalized) MF.getProperties().set(MachineFunctionProperties::Property::Legalized); if (YamlMF.RegBankSelected) @@ -638,7 +643,7 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS, // be saved for the caller). if (YamlMF.CalleeSavedRegisters) { SmallVector<MCPhysReg, 16> CalleeSavedRegisters; - for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { + for (const auto &RegSource : *YamlMF.CalleeSavedRegisters) { Register Reg; if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error)) return error(Error, RegSource.SourceRange); @@ -809,7 +814,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Object.CalleeSavedRestored, ObjectIdx)) return true; if (Object.LocalOffset) - MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); + MFI.mapLocalFrameObject(ObjectIdx, *Object.LocalOffset); if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) return true; } @@ -826,6 +831,15 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, return error(Error, YamlMFI.StackProtector.SourceRange); MFI.setStackProtectorIndex(FI); } + + if (!YamlMFI.FunctionContext.Value.empty()) { + SMDiagnostic Error; + int FI; + if (parseStackObjectReference(PFS, FI, YamlMFI.FunctionContext.Value, Error)) + return error(Error, YamlMFI.FunctionContext.SourceRange); + MFI.setFunctionContextIndex(FI); + } + return false; } @@ -909,7 +923,7 @@ bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, return error(Error, YamlConstant.Value.SourceRange); const Align PrefTypeAlign = M.getDataLayout().getPrefTypeAlign(Value->getType()); - const Align Alignment = YamlConstant.Alignment.getValueOr(PrefTypeAlign); + const Align Alignment = YamlConstant.Alignment.value_or(PrefTypeAlign); unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) .second) @@ -1023,7 +1037,7 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) : Impl(std::move(Impl)) {} -MIRParser::~MIRParser() {} +MIRParser::~MIRParser() = default; std::unique_ptr<Module> MIRParser::parseIRModule(DataLayoutCallbackTy DataLayoutCallback) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp index dc72f83ad0e4..25823b1567f7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRPrinter.cpp @@ -13,14 +13,11 @@ #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -32,29 +29,19 @@ #include "llvm/CodeGen/MachineModuleSlotTracker.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" #include "llvm/MC/LaneBitmask.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -63,7 +50,6 @@ #include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> @@ -209,6 +195,12 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); + YamlMF.CallsEHReturn = MF.callsEHReturn(); + YamlMF.CallsUnwindInit = MF.callsUnwindInit(); + YamlMF.HasEHCatchret = MF.hasEHCatchret(); + YamlMF.HasEHScopes = MF.hasEHScopes(); + YamlMF.HasEHFunclets = MF.hasEHFunclets(); + YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); YamlMF.RegBankSelected = MF.getProperties().hasProperty( @@ -489,6 +481,12 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, .printStackObjectReference(MFI.getStackProtectorIndex()); } + if (MFI.hasFunctionContextIndex()) { + raw_string_ostream StrOS(YMF.FrameInfo.FunctionContext.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printStackObjectReference(MFI.getFunctionContextIndex()); + } + // Print the debug variable information. for (const MachineFunction::VariableDbgInfo &DebugVar : MF.getVariableDbgInfo()) { @@ -693,11 +691,11 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { // Print the live in registers. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - if (MRI.tracksLiveness() && !MBB.livein_empty()) { + if (!MBB.livein_empty()) { const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); OS.indent(2) << "liveins: "; bool First = true; - for (const auto &LI : MBB.liveins()) { + for (const auto &LI : MBB.liveins_dbg()) { if (!First) OS << ", "; First = false; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp index b742ad9823c9..a8996a586909 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -15,7 +15,15 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index 5862504109f0..a2abe71a6bd7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -10,7 +10,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineStableHash.h" #include "llvm/IR/Constants.h" -#include "llvm/Support/Debug.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp index 33782c755eb0..7daf9025d303 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp @@ -10,17 +10,19 @@ // //===----------------------------------------------------------------------===// +#include "AllocationOrder.h" #include "RegAllocEvictionAdvisor.h" #include "RegAllocGreedy.h" -#include "RegAllocScore.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MLModelRunner.h" +#include "llvm/Analysis/TensorSpec.h" +#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TF_API) #include "llvm/Analysis/ModelUnderTrainingRunner.h" #include "llvm/Analysis/NoInferenceModelRunner.h" +#endif #include "llvm/Analysis/ReleaseModeModelRunner.h" -#include "llvm/Analysis/Utils/TFUtils.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -28,13 +30,11 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Config/config.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" #include <array> #include <memory> @@ -46,10 +46,16 @@ using namespace llvm; // Generated header in release (AOT) mode #if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) #include "RegallocEvictModel.h" +using CompiledModelType = RegallocEvictModel; +#else +using CompiledModelType = NoopSavedModelImpl; #endif // Options that only make sense in development mode #ifdef LLVM_HAVE_TF_API +#include "RegAllocScore.h" +#include "llvm/Analysis/Utils/TFUtils.h" + static cl::opt<std::string> TrainingLog( "regalloc-training-log", cl::Hidden, cl::desc("Training log for the register allocator eviction model")); @@ -60,6 +66,8 @@ static cl::opt<std::string> ModelUnderTraining( #endif // #ifdef LLVM_HAVE_TF_API +extern cl::opt<unsigned> EvictInterferenceCutoff; + /// The score injection pass. /// This pass calculates the score for a function and inserts it in the log, but /// this happens only in development mode. It's a no-op otherwise. @@ -240,8 +248,8 @@ using FeaturesListNormalizer = std::array<float, FeatureIDs::FeatureCount>; /// The ML evictor (commonalities between release and development mode) class MLEvictAdvisor : public RegAllocEvictionAdvisor { public: - MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, - const MachineBlockFrequencyInfo &MBFI, + MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, + MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops); protected: @@ -257,14 +265,16 @@ protected: /// if we're just capturing the log of the default advisor, it needs to call /// the latter instead, so we need to pass all the necessary parameters for /// it. In the development case, it will also log. - virtual int64_t tryFindEvictionCandidatePosition( - LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, - uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const; + virtual int64_t + tryFindEvictionCandidatePosition(const LiveInterval &VirtReg, + const AllocationOrder &Order, + unsigned OrderLimit, uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const; /// Load the features of the given VirtReg (allocated or not) at column Pos, /// but if that can't be evicted, return false instead. bool - loadInterferenceFeatures(LiveInterval &VirtReg, MCRegister PhysReg, + loadInterferenceFeatures(const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos) const; @@ -273,24 +283,24 @@ private: static float getInitialQueueSize(const MachineFunction &MF); MCRegister tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, + const LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const override; - void extractFeatures(const SmallVectorImpl<LiveInterval *> &Intervals, + void extractFeatures(const SmallVectorImpl<const LiveInterval *> &Intervals, std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const; // Point-in-time: we didn't learn this, so we always delegate to the default. bool canEvictHintInterference( - LiveInterval &VirtReg, MCRegister PhysReg, + const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const override { return getDefaultAdvisor().canEvictHintInterference(VirtReg, PhysReg, FixedRegisters); } - const LIFeatureComponents + const LIFeatureComponents & getLIFeatureComponents(const LiveInterval &LI) const; // Hold on to a default advisor for: @@ -306,17 +316,21 @@ private: // This could be static and shared, but its initialization is non-trivial. std::bitset<FeatureIDs::FeatureCount> DoNotNormalize; const float InitialQSize; + + using RegID = unsigned; + mutable DenseMap<RegID, LIFeatureComponents> CachedFeatures; }; +#define _DECL_FEATURES(type, name, shape, _) \ + TensorSpec::createSpec<type>(#name, shape), + +static const std::vector<TensorSpec> InputFeatures{ + {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, +}; +#undef _DECL_FEATURES // =================================== // Release (AOT) - specifics // =================================== -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) -const std::array<std::string, FeatureIDs::FeatureCount> FeatureNames{ -#define _GETNAME(_, NAME, __, ___) #NAME, - RA_EVICT_FEATURES_LIST(_GETNAME) -#undef _GETNAME -}; class ReleaseModeEvictionAdvisorAnalysis final : public RegAllocEvictionAdvisorAnalysis { public: @@ -335,17 +349,16 @@ private: } std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { if (!Runner) - Runner = std::make_unique<ReleaseModeModelRunner<RegallocEvictModel>>( - MF.getFunction().getContext(), FeatureNames, DecisionName); + Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>( + MF.getFunction().getContext(), InputFeatures, DecisionName); return std::make_unique<MLEvictAdvisor>( MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(), getAnalysis<MachineLoopInfo>()); } - std::unique_ptr<ReleaseModeModelRunner<RegallocEvictModel>> Runner; + std::unique_ptr<ReleaseModeModelRunner<CompiledModelType>> Runner; }; -#endif // =================================== // Development mode-specifics @@ -353,13 +366,6 @@ private: // // Features we log #ifdef LLVM_HAVE_TF_API -#define _DECL_FEATURES(type, name, shape, _) \ - TensorSpec::createSpec<type>(#name, shape), - -static const std::vector<TensorSpec> InputFeatures{ - {RA_EVICT_FEATURES_LIST(_DECL_FEATURES)}, -}; -#undef _DECL_FEATURES static const TensorSpec Output = TensorSpec::createSpec<int64_t>(DecisionName, {1}); static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1}); @@ -380,7 +386,7 @@ static const std::vector<TensorSpec> TrainingInputFeatures{ class DevelopmentModeEvictAdvisor : public MLEvictAdvisor { public: - DevelopmentModeEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, + DevelopmentModeEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops, Logger *Log) @@ -388,8 +394,8 @@ public: private: int64_t tryFindEvictionCandidatePosition( - LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, - uint8_t CostPerUseLimit, + const LiveInterval &VirtReg, const AllocationOrder &Order, + unsigned OrderLimit, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const override; Logger *const Log; @@ -436,7 +442,7 @@ private: } std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { LLVMContext &Ctx = MF.getFunction().getContext(); if (ModelUnderTraining.empty() && TrainingLog.empty()) { Ctx.emitError("Regalloc development mode should be requested with at " @@ -496,7 +502,7 @@ float MLEvictAdvisor::getInitialQueueSize(const MachineFunction &MF) { return Ret; } -MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, +MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA, MLModelRunner *Runner, const MachineBlockFrequencyInfo &MBFI, const MachineLoopInfo &Loops) @@ -514,7 +520,7 @@ MLEvictAdvisor::MLEvictAdvisor(MachineFunction &MF, const RAGreedy &RA, } int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( - LiveInterval &, const AllocationOrder &, unsigned, uint8_t, + const LiveInterval &, const AllocationOrder &, unsigned, uint8_t, const SmallVirtRegSet &) const { int64_t Ret = Runner->evaluate<int64_t>(); assert(Ret >= 0); @@ -523,7 +529,7 @@ int64_t MLEvictAdvisor::tryFindEvictionCandidatePosition( } bool MLEvictAdvisor::loadInterferenceFeatures( - LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, const SmallVirtRegSet &FixedRegisters, FeaturesListNormalizer &Largest, size_t Pos) const { // It is only possible to evict virtual register interference. @@ -539,16 +545,18 @@ bool MLEvictAdvisor::loadInterferenceFeatures( // The cascade tracking is the same as in the default advisor unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg()); - SmallVector<LiveInterval *, MaxInterferences> InterferingIntervals; + SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // Different from the default heuristic, we don't make any assumptions about // what having more than 10 results in the query may mean. - const auto &IFIntervals = Q.interferingVRegs(); + const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff); if (IFIntervals.empty() && InterferingIntervals.empty()) continue; + if (IFIntervals.size() >= EvictInterferenceCutoff) + return false; InterferingIntervals.append(IFIntervals.begin(), IFIntervals.end()); - for (LiveInterval *Intf : reverse(IFIntervals)) { + for (const LiveInterval *Intf : reverse(IFIntervals)) { assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); // This is the same set of legality checks as in the default case: don't @@ -587,7 +595,7 @@ bool MLEvictAdvisor::loadInterferenceFeatures( } MCRegister MLEvictAdvisor::tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, + const LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { auto MaybeOrderLimit = getOrderLimit(VirtReg, Order, CostPerUseLimit); if (!MaybeOrderLimit) @@ -652,7 +660,7 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( // decision making process. Regs[CandidateVirtRegPos].second = !MustFindEviction; if (!MustFindEviction) - extractFeatures(SmallVector<LiveInterval *, 1>(1, &VirtReg), Largest, + extractFeatures(SmallVector<const LiveInterval *, 1>(1, &VirtReg), Largest, CandidateVirtRegPos, /*IsHint*/ 0, /*LocalIntfsCount*/ 0, /*NrUrgent*/ 0.0); assert(InitialQSize > 0.0 && "We couldn't have gotten here if we had " @@ -686,9 +694,15 @@ MCRegister MLEvictAdvisor::tryFindEvictionCandidate( return Regs[CandidatePos].first; } -const LIFeatureComponents +const LIFeatureComponents & MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { - LIFeatureComponents Ret; + RegID ID = LI.reg().id(); + LIFeatureComponents Empty; + auto I = CachedFeatures.insert(std::make_pair(ID, Empty)); + LIFeatureComponents &Ret = I.first->getSecond(); + if (!I.second) + return Ret; + SmallPtrSet<MachineInstr *, 8> Visited; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -733,7 +747,7 @@ MLEvictAdvisor::getLIFeatureComponents(const LiveInterval &LI) const { // Overall, this currently mimics what we do for weight calculation, but instead // of accummulating the various features, we keep them separate. void MLEvictAdvisor::extractFeatures( - const SmallVectorImpl<LiveInterval *> &Intervals, + const SmallVectorImpl<const LiveInterval *> &Intervals, std::array<float, FeatureIDs::FeatureCount> &Largest, size_t Pos, int64_t IsHint, int64_t LocalIntfsCount, float NrUrgent) const { int64_t NrDefsAndUses = 0; @@ -769,7 +783,7 @@ void MLEvictAdvisor::extractFeatures( if (LI.endIndex() > EndSI) EndSI = LI.endIndex(); - const LIFeatureComponents LIFC = getLIFeatureComponents(LI); + const LIFeatureComponents &LIFC = getLIFeatureComponents(LI); NrBrokenHints += VRM->hasPreferredPhys(LI.reg()); NrDefsAndUses += LIFC.NrDefsAndUses; @@ -831,8 +845,9 @@ RegAllocEvictionAdvisorAnalysis *llvm::createDevelopmentModeAdvisor() { } int64_t DevelopmentModeEvictAdvisor::tryFindEvictionCandidatePosition( - LiveInterval &VirtReg, const AllocationOrder &Order, unsigned OrderLimit, - uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { + const LiveInterval &VirtReg, const AllocationOrder &Order, + unsigned OrderLimit, uint8_t CostPerUseLimit, + const SmallVirtRegSet &FixedRegisters) const { int64_t Ret = 0; if (isa<ModelUnderTrainingRunner>(getRunner())) { Ret = MLEvictAdvisor::tryFindEvictionCandidatePosition( @@ -885,11 +900,9 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) { } #endif // #ifdef LLVM_HAVE_TF_API -#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() { return new ReleaseModeEvictionAdvisorAnalysis(); } -#endif // In all cases except development mode, we don't need scoring. #if !defined(LLVM_HAVE_TF_API) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp index 8c9d00d08c6a..c186d0ba9969 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -11,8 +11,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,12 +26,10 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -53,8 +51,7 @@ MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) IrrLoopHeaderWeight = B->getIrrLoopHeaderWeight(); } -MachineBasicBlock::~MachineBasicBlock() { -} +MachineBasicBlock::~MachineBasicBlock() = default; /// Return the MCSymbol for this basic block. MCSymbol *MachineBasicBlock::getSymbol() const { @@ -135,7 +132,7 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList( // Make sure the instructions have their operands in the reginfo lists. MachineRegisterInfo &RegInfo = MF.getRegInfo(); for (MachineInstr &MI : N->instrs()) - MI.AddRegOperandsToUseLists(RegInfo); + MI.addRegOperandsToUseLists(RegInfo); } void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList( @@ -153,7 +150,7 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { // Add the instruction's register operands to their corresponding // use/def lists. MachineFunction *MF = Parent->getParent(); - N->AddRegOperandsToUseLists(MF->getRegInfo()); + N->addRegOperandsToUseLists(MF->getRegInfo()); MF->handleInsertion(*N); } @@ -165,7 +162,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { // Remove from the use/def lists. if (MachineFunction *MF = N->getMF()) { MF->handleRemoval(*N); - N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); + N->removeRegOperandsFromUseLists(MF->getRegInfo()); } N->setParent(nullptr); @@ -918,6 +915,10 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { return std::next(I) == MachineFunction::const_iterator(MBB); } +const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const { + return Successors.size() == 1 ? Successors[0] : nullptr; +} + MachineBasicBlock *MachineBasicBlock::getFallThrough() { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; @@ -1620,6 +1621,16 @@ MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const { return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false); } +bool MachineBasicBlock::sizeWithoutDebugLargerThan(unsigned Limit) const { + unsigned Cntr = 0; + auto R = instructionsWithoutDebug(begin(), end()); + for (auto I = R.begin(), E = R.end(); I != E; ++I) { + if (++Cntr > Limit) + return true; + } + return false; +} + const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold); const MBBSectionID MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp index c93ffaabf74c..4cc84f22bdde 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -34,13 +34,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TailDuplicator.h" @@ -50,6 +50,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/IR/PrintPasses.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" @@ -200,10 +201,8 @@ static cl::opt<unsigned> TriangleChainCount( cl::init(2), cl::Hidden); -static cl::opt<bool> EnableExtTspBlockPlacement( - "enable-ext-tsp-block-placement", cl::Hidden, cl::init(false), - cl::desc("Enable machine block placement based on the ext-tsp model, " - "optimizing I-cache utilization.")); +extern cl::opt<bool> EnableExtTspBlockPlacement; +extern cl::opt<bool> ApplyExtTspWithoutProfile; namespace llvm { extern cl::opt<unsigned> StaticLikelyProb; @@ -3422,7 +3421,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } // Apply a post-processing optimizing block placement. - if (MF.size() >= 3 && EnableExtTspBlockPlacement) { + if (MF.size() >= 3 && EnableExtTspBlockPlacement && + (ApplyExtTspWithoutProfile || MF.getFunction().hasProfileData())) { // Find a new placement and modify the layout of the blocks in the function. applyExtTsp(); @@ -3660,6 +3660,9 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { if (std::next(F.begin()) == F.end()) return false; + if (!isFunctionInPrintList(F.getName())) + return false; + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index c9f762f9a6e7..a84377d70855 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -12,10 +12,8 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp index 0fcb07252d0e..e60fd9f7883a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCSE.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -91,6 +90,11 @@ namespace { AU.addPreserved<MachineBlockFrequencyInfo>(); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA); + } + void releaseMemory() override { ScopeMap.clear(); PREMap.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp index bd7f0f862947..1e5b8dd0bbb0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCheckDebugify.cpp @@ -11,13 +11,14 @@ /// DILocalVariable which mir-debugifiy generated before. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Transforms/Utils/Debugify.h" +#include "llvm/Pass.h" #define DEBUG_TYPE "mir-check-debugify" @@ -27,9 +28,6 @@ namespace { struct CheckDebugMachineModule : public ModulePass { bool runOnModule(Module &M) override { - MachineModuleInfo &MMI = - getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); - NamedMDNode *NMD = M.getNamedMetadata("llvm.mir.debugify"); if (!NMD) { errs() << "WARNING: Please run mir-debugify to generate " @@ -37,6 +35,9 @@ struct CheckDebugMachineModule : public ModulePass { return false; } + MachineModuleInfo &MMI = + getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); + auto getDebugifyOperand = [&](unsigned Idx) -> unsigned { return mdconst::extract<ConstantInt>(NMD->getOperand(Idx)->getOperand(0)) ->getZExtValue(); @@ -106,8 +107,7 @@ struct CheckDebugMachineModule : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<MachineModuleInfoWrapperPass>(); - AU.addPreserved<MachineModuleInfoWrapperPass>(); - AU.setPreservesCFG(); + AU.setPreservesAll(); } static char ID; // Pass identification. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp index 72ab9ee4f388..722a709af240 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCombiner.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -278,6 +277,8 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { case MachineCombinerPattern::REASSOC_XA_YB: case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: + case MachineCombinerPattern::SUBADD_OP1: + case MachineCombinerPattern::SUBADD_OP2: return CombinerObjective::MustReduceDepth; case MachineCombinerPattern::REASSOC_XY_BCA: case MachineCombinerPattern::REASSOC_XY_BAC: diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 57fbe4112e47..66f0eb83e57c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -83,8 +83,24 @@ STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); +static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false), + cl::Hidden); + namespace { +static Optional<DestSourcePair> isCopyInstr(const MachineInstr &MI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + if (UseCopyInstr) + return TII.isCopyInstr(MI); + + if (MI.isCopy()) + return Optional<DestSourcePair>( + DestSourcePair{MI.getOperand(0), MI.getOperand(1)}); + + return None; +} + class CopyTracker { struct CopyInfo { MachineInstr *MI; @@ -110,7 +126,8 @@ public: } /// Remove register from copy maps. - void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { + void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg // and invalidate all of them. @@ -120,8 +137,13 @@ public: auto I = Copies.find(*RUI); if (I != Copies.end()) { if (MachineInstr *MI = I->second.MI) { - RegsToInvalidate.insert(MI->getOperand(0).getReg().asMCReg()); - RegsToInvalidate.insert(MI->getOperand(1).getReg().asMCReg()); + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Expect copy"); + + RegsToInvalidate.insert( + CopyOperands->Destination->getReg().asMCReg()); + RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg()); } RegsToInvalidate.insert(I->second.DefRegs.begin(), I->second.DefRegs.end()); @@ -133,7 +155,8 @@ public: } /// Clobber a single register, removing it from the tracker's copy maps. - void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI) { + void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { auto I = Copies.find(*RUI); if (I != Copies.end()) { @@ -142,8 +165,12 @@ public: markRegsUnavailable(I->second.DefRegs, TRI); // When we clobber the destination of a copy, we need to clobber the // whole register it defined. - if (MachineInstr *MI = I->second.MI) - markRegsUnavailable({MI->getOperand(0).getReg().asMCReg()}, TRI); + if (MachineInstr *MI = I->second.MI) { + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, + TRI); + } // Now we can erase the copy. Copies.erase(I); } @@ -151,11 +178,13 @@ public: } /// Add this copy's registers into the tracker's copy maps. - void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) { - assert(MI->isCopy() && "Tracking non-copy?"); + void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { + Optional<DestSourcePair> CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Tracking non-copy?"); - MCRegister Def = MI->getOperand(0).getReg().asMCReg(); - MCRegister Src = MI->getOperand(1).getReg().asMCReg(); + MCRegister Src = CopyOperands->Source->getReg().asMCReg(); + MCRegister Def = CopyOperands->Destination->getReg().asMCReg(); // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) @@ -198,15 +227,22 @@ public: } MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); - if (!AvailCopy || - !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) + + if (!AvailCopy) + return nullptr; + + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*AvailCopy, TII, UseCopyInstr); + Register AvailSrc = CopyOperands->Source->getReg(); + Register AvailDef = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(AvailSrc, Reg)) return nullptr; - Register AvailSrc = AvailCopy->getOperand(1).getReg(); - Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getReverseIterator(), I.getReverseIterator())) for (const MachineOperand &MO : MI.operands()) @@ -219,20 +255,26 @@ public: } MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, bool UseCopyInstr) { // We check the first RegUnit here, since we'll only be interested in the // copy if it copies the entire register anyway. MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true); - if (!AvailCopy || - !TRI.isSubRegisterEq(AvailCopy->getOperand(0).getReg(), Reg)) + + if (!AvailCopy) + return nullptr; + + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*AvailCopy, TII, UseCopyInstr); + Register AvailSrc = CopyOperands->Source->getReg(); + Register AvailDef = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(AvailDef, Reg)) return nullptr; // Check that the available copy isn't clobbered by any regmasks between // itself and the destination. - Register AvailSrc = AvailCopy->getOperand(1).getReg(); - Register AvailDef = AvailCopy->getOperand(0).getReg(); for (const MachineInstr &MI : make_range(AvailCopy->getIterator(), DestCopy.getIterator())) for (const MachineOperand &MO : MI.operands()) @@ -253,10 +295,14 @@ class MachineCopyPropagation : public MachineFunctionPass { const TargetInstrInfo *TII; const MachineRegisterInfo *MRI; + // Return true if this is a copy instruction and false otherwise. + bool UseCopyInstr; + public: static char ID; // Pass identification, replacement for typeid - MachineCopyPropagation() : MachineFunctionPass(ID) { + MachineCopyPropagation(bool CopyInstr = false) + : MachineFunctionPass(ID), UseCopyInstr(CopyInstr || MCPUseCopyInstr) { initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); } @@ -334,9 +380,13 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader, /// isNopCopy("ecx = COPY eax", AX, CX) == true /// isNopCopy("ecx = COPY eax", AH, CL) == false static bool isNopCopy(const MachineInstr &PreviousCopy, MCRegister Src, - MCRegister Def, const TargetRegisterInfo *TRI) { - MCRegister PreviousSrc = PreviousCopy.getOperand(1).getReg().asMCReg(); - MCRegister PreviousDef = PreviousCopy.getOperand(0).getReg().asMCReg(); + MCRegister Def, const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, bool UseCopyInstr) { + + Optional<DestSourcePair> CopyOperands = + isCopyInstr(PreviousCopy, *TII, UseCopyInstr); + MCRegister PreviousSrc = CopyOperands->Source->getReg().asMCReg(); + MCRegister PreviousDef = CopyOperands->Destination->getReg().asMCReg(); if (Src == PreviousSrc && Def == PreviousDef) return true; if (!TRI->isSubRegister(PreviousSrc, Src)) @@ -356,22 +406,26 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, return false; // Search for an existing copy. - MachineInstr *PrevCopy = Tracker.findAvailCopy(Copy, Def, *TRI); + MachineInstr *PrevCopy = + Tracker.findAvailCopy(Copy, Def, *TRI, *TII, UseCopyInstr); if (!PrevCopy) return false; + auto PrevCopyOperands = isCopyInstr(*PrevCopy, *TII, UseCopyInstr); // Check that the existing copy uses the correct sub registers. - if (PrevCopy->getOperand(0).isDead()) + if (PrevCopyOperands->Destination->isDead()) return false; - if (!isNopCopy(*PrevCopy, Src, Def, TRI)) + if (!isNopCopy(*PrevCopy, Src, Def, TRI, TII, UseCopyInstr)) return false; LLVM_DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); // Copy was redundantly redefining either Src or Def. Remove earlier kill // flags between Copy and PrevCopy because the value will be reused now. - assert(Copy.isCopy()); - Register CopyDef = Copy.getOperand(0).getReg(); + Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + assert(CopyOperands); + + Register CopyDef = CopyOperands->Destination->getReg(); assert(CopyDef == Src || CopyDef == Def); for (MachineInstr &MI : make_range(PrevCopy->getIterator(), Copy.getIterator())) @@ -385,7 +439,9 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - Register Def = Copy.getOperand(0).getReg(); + + Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); if (const TargetRegisterClass *URC = UseI.getRegClassConstraint(UseIdx, TII, TRI)) @@ -403,7 +459,8 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { - Register CopySrcReg = Copy.getOperand(1).getReg(); + Optional<DestSourcePair> CopyOperands = isCopyInstr(Copy, *TII, UseCopyInstr); + Register CopySrcReg = CopyOperands->Source->getReg(); // If the new register meets the opcode register constraints, then allow // forwarding. @@ -411,34 +468,10 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, UseI.getRegClassConstraint(UseIdx, TII, TRI)) return URC->contains(CopySrcReg); - if (!UseI.isCopy()) + auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr); + if (!UseICopyOperands) return false; - const TargetRegisterClass *CopySrcRC = - TRI->getMinimalPhysRegClass(CopySrcReg); - const TargetRegisterClass *UseDstRC = - TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); - const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC); - - // If cross copy register class is not the same as copy source register class - // then it is not possible to copy the register directly and requires a cross - // register class copy. Fowarding this copy without checking register class of - // UseDst may create additional cross register copies when expanding the copy - // instruction in later passes. - if (CopySrcRC != CrossCopyRC) { - const TargetRegisterClass *CopyDstRC = - TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg()); - - // Check if UseDstRC matches the necessary register class to copy from - // CopySrc's register class. If so then forwarding the copy will not - // introduce any cross-class copys. Else if CopyDstRC matches then keep the - // copy and do not forward. If neither UseDstRC or CopyDstRC matches then - // we may need a cross register copy later but we do not worry about it - // here. - if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC) - return false; - } - /// COPYs don't have register class constraints, so if the user instruction /// is a COPY, we just try to avoid introducing additional cross-class /// COPYs. For example: @@ -455,12 +488,34 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, /// /// so we have reduced the number of cross-class COPYs and potentially /// introduced a nop COPY that can be removed. - const TargetRegisterClass *SuperRC = UseDstRC; - for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); - SuperRC; SuperRC = *SuperRCI++) - if (SuperRC->contains(CopySrcReg)) - return true; + // Allow forwarding if src and dst belong to any common class, so long as they + // don't belong to any (possibly smaller) common class that requires copies to + // go via a different class. + Register UseDstReg = UseICopyOperands->Destination->getReg(); + bool Found = false; + bool IsCrossClass = false; + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if (RC->contains(CopySrcReg) && RC->contains(UseDstReg)) { + Found = true; + if (TRI->getCrossCopyRegClass(RC) != RC) { + IsCrossClass = true; + break; + } + } + } + if (!Found) + return false; + if (!IsCrossClass) + return true; + // The forwarded copy would be cross-class. Only do this if the original copy + // was also cross-class. + Register CopyDstReg = CopyOperands->Destination->getReg(); + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) && + TRI->getCrossCopyRegClass(RC) != RC) + return true; + } return false; } @@ -527,13 +582,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { if (!MOUse.isRenamable()) continue; - MachineInstr *Copy = - Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), *TRI); + MachineInstr *Copy = Tracker.findAvailCopy(MI, MOUse.getReg().asMCReg(), + *TRI, *TII, UseCopyInstr); if (!Copy) continue; - Register CopyDstReg = Copy->getOperand(0).getReg(); - const MachineOperand &CopySrc = Copy->getOperand(1); + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register CopyDstReg = CopyOperands->Destination->getReg(); + const MachineOperand &CopySrc = *CopyOperands->Source; Register CopySrcReg = CopySrc.getReg(); // FIXME: Don't handle partial uses of wider COPYs yet. @@ -557,7 +614,8 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) { // Check that the instruction is not a copy that partially overwrites the // original copy source that we are about to use. The tracker mechanism // cannot cope with that. - if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) && + if (isCopyInstr(MI, *TII, UseCopyInstr) && + MI.modifiesRegister(CopySrcReg, TRI) && !MI.definesRegister(CopySrcReg)) { LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI); continue; @@ -596,76 +654,82 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { // Analyze copies (which don't overlap themselves). - if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(), - MI.getOperand(1).getReg())) { - assert(MI.getOperand(0).getReg().isPhysical() && - MI.getOperand(1).getReg().isPhysical() && - "MachineCopyPropagation should be run after register allocation!"); - - MCRegister Def = MI.getOperand(0).getReg().asMCReg(); - MCRegister Src = MI.getOperand(1).getReg().asMCReg(); - - // The two copies cancel out and the source of the first copy - // hasn't been overridden, eliminate the second one. e.g. - // %ecx = COPY %eax - // ... nothing clobbered eax. - // %eax = COPY %ecx - // => - // %ecx = COPY %eax - // - // or - // - // %ecx = COPY %eax - // ... nothing clobbered eax. - // %ecx = COPY %eax - // => - // %ecx = COPY %eax - if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def)) - continue; + Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + if (CopyOperands) { + + Register RegSrc = CopyOperands->Source->getReg(); + Register RegDef = CopyOperands->Destination->getReg(); + + if (!TRI->regsOverlap(RegDef, RegSrc)) { + assert(RegDef.isPhysical() && RegSrc.isPhysical() && + "MachineCopyPropagation should be run after register allocation!"); + + MCRegister Def = RegDef.asMCReg(); + MCRegister Src = RegSrc.asMCReg(); + + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %eax = COPY %ecx + // => + // %ecx = COPY %eax + // + // or + // + // %ecx = COPY %eax + // ... nothing clobbered eax. + // %ecx = COPY %eax + // => + // %ecx = COPY %eax + if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def)) + continue; - forwardUses(MI); + forwardUses(MI); + + // Src may have been changed by forwardUses() + CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + Src = CopyOperands->Source->getReg().asMCReg(); + + // If Src is defined by a previous copy, the previous copy cannot be + // eliminated. + ReadRegister(Src, MI, RegularUse); + for (const MachineOperand &MO : MI.implicit_operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + MCRegister Reg = MO.getReg().asMCReg(); + if (!Reg) + continue; + ReadRegister(Reg, MI, RegularUse); + } - // Src may have been changed by forwardUses() - Src = MI.getOperand(1).getReg().asMCReg(); + LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump()); + + // Copy is now a candidate for deletion. + if (!MRI->isReserved(Def)) + MaybeDeadCopies.insert(&MI); + + // If 'Def' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9 = copy %xmm2 + // ... + // %xmm2 = copy %xmm0 + // ... + // %xmm2 = copy %xmm9 + Tracker.clobberRegister(Def, *TRI, *TII, UseCopyInstr); + for (const MachineOperand &MO : MI.implicit_operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + MCRegister Reg = MO.getReg().asMCReg(); + if (!Reg) + continue; + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); + } - // If Src is defined by a previous copy, the previous copy cannot be - // eliminated. - ReadRegister(Src, MI, RegularUse); - for (const MachineOperand &MO : MI.implicit_operands()) { - if (!MO.isReg() || !MO.readsReg()) - continue; - MCRegister Reg = MO.getReg().asMCReg(); - if (!Reg) - continue; - ReadRegister(Reg, MI, RegularUse); - } + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); - LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump()); - - // Copy is now a candidate for deletion. - if (!MRI->isReserved(Def)) - MaybeDeadCopies.insert(&MI); - - // If 'Def' is previously source of another copy, then this earlier copy's - // source is no longer available. e.g. - // %xmm9 = copy %xmm2 - // ... - // %xmm2 = copy %xmm0 - // ... - // %xmm2 = copy %xmm9 - Tracker.clobberRegister(Def, *TRI); - for (const MachineOperand &MO : MI.implicit_operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - MCRegister Reg = MO.getReg().asMCReg(); - if (!Reg) - continue; - Tracker.clobberRegister(Reg, *TRI); + continue; } - - Tracker.trackCopy(&MI, *TRI); - - continue; } // Clobber any earlyclobber regs first. @@ -677,7 +741,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // later. if (MO.isTied()) ReadRegister(Reg, MI, RegularUse); - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } forwardUses(MI); @@ -713,7 +777,9 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { MaybeDeadCopies.begin(); DI != MaybeDeadCopies.end();) { MachineInstr *MaybeDead = *DI; - MCRegister Reg = MaybeDead->getOperand(0).getReg().asMCReg(); + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*MaybeDead, *TII, UseCopyInstr); + MCRegister Reg = CopyOperands->Destination->getReg().asMCReg(); assert(!MRI->isReserved(Reg)); if (!RegMask->clobbersPhysReg(Reg)) { @@ -726,7 +792,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Make sure we invalidate any entries in the copy maps before erasing // the instruction. - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); // erase() will return the next valid iterator pointing to the next // element after the erased one. @@ -739,7 +805,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Any previous copy definition or reading the Defs is no longer available. for (MCRegister Reg : Defs) - Tracker.clobberRegister(Reg, *TRI); + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); } // If MBB doesn't have successors, delete the copies whose defs are not used. @@ -749,12 +815,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { LLVM_DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; MaybeDead->dump()); - assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); + + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*MaybeDead, *TII, UseCopyInstr); + assert(CopyOperands); + + Register SrcReg = CopyOperands->Source->getReg(); + Register DestReg = CopyOperands->Destination->getReg(); + assert(!MRI->isReserved(DestReg)); // Update matching debug values, if any. - assert(MaybeDead->isCopy()); - Register SrcReg = MaybeDead->getOperand(1).getReg(); - Register DestReg = MaybeDead->getOperand(0).getReg(); SmallVector<MachineInstr *> MaybeDeadDbgUsers( CopyDbgUsers[MaybeDead].begin(), CopyDbgUsers[MaybeDead].end()); MRI->updateDbgUsersToReg(DestReg.asMCReg(), SrcReg.asMCReg(), @@ -772,10 +842,14 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { } static bool isBackwardPropagatableCopy(MachineInstr &MI, - const MachineRegisterInfo &MRI) { - assert(MI.isCopy() && "MI is expected to be a COPY"); - Register Def = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); + const MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, TII, UseCopyInstr); + assert(CopyOperands && "MI is expected to be a COPY"); + + Register Def = CopyOperands->Destination->getReg(); + Register Src = CopyOperands->Source->getReg(); if (!Def || !Src) return false; @@ -783,7 +857,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI, if (MRI.isReserved(Def) || MRI.isReserved(Src)) return false; - return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill(); + return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill(); } void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { @@ -808,13 +882,15 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { if (!MODef.isRenamable()) continue; - MachineInstr *Copy = - Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI); + MachineInstr *Copy = Tracker.findAvailBackwardCopy( + MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr); if (!Copy) continue; - Register Def = Copy->getOperand(0).getReg(); - Register Src = Copy->getOperand(1).getReg(); + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); + Register Src = CopyOperands->Source->getReg(); if (MODef.getReg() != Src) continue; @@ -833,7 +909,7 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { << MI << " from " << *Copy); MODef.setReg(Def); - MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + MODef.setIsRenamable(CopyOperands->Destination->isRenamable()); LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); MaybeDeadCopies.insert(Copy); @@ -849,20 +925,23 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { // Ignore non-trivial COPYs. - if (MI.isCopy() && MI.getNumOperands() == 2 && - !TRI->regsOverlap(MI.getOperand(0).getReg(), - MI.getOperand(1).getReg())) { - - MCRegister Def = MI.getOperand(0).getReg().asMCReg(); - MCRegister Src = MI.getOperand(1).getReg().asMCReg(); - - // Unlike forward cp, we don't invoke propagateDefs here, - // just let forward cp do COPY-to-COPY propagation. - if (isBackwardPropagatableCopy(MI, *MRI)) { - Tracker.invalidateRegister(Src, *TRI); - Tracker.invalidateRegister(Def, *TRI); - Tracker.trackCopy(&MI, *TRI); - continue; + Optional<DestSourcePair> CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); + if (CopyOperands && MI.getNumOperands() == 2) { + Register DefReg = CopyOperands->Destination->getReg(); + Register SrcReg = CopyOperands->Source->getReg(); + + if (!TRI->regsOverlap(DefReg, SrcReg)) { + MCRegister Def = DefReg.asMCReg(); + MCRegister Src = SrcReg.asMCReg(); + + // Unlike forward cp, we don't invoke propagateDefs here, + // just let forward cp do COPY-to-COPY propagation. + if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) { + Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr); + Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr); + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); + continue; + } } } @@ -872,7 +951,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( MCRegister Reg = MO.getReg().asMCReg(); if (!Reg) continue; - Tracker.invalidateRegister(Reg, *TRI); + Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr); } propagateDefs(MI); @@ -884,7 +963,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( continue; if (MO.isDef()) - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII, + UseCopyInstr); if (MO.readsReg()) { if (MO.isDebug()) { @@ -898,7 +978,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( } } } else { - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII, + UseCopyInstr); } } } @@ -906,8 +987,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock( for (auto *Copy : MaybeDeadCopies) { - Register Src = Copy->getOperand(1).getReg(); - Register Def = Copy->getOperand(0).getReg(); + Optional<DestSourcePair> CopyOperands = + isCopyInstr(*Copy, *TII, UseCopyInstr); + Register Src = CopyOperands->Source->getReg(); + Register Def = CopyOperands->Destination->getReg(); SmallVector<MachineInstr *> MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(), CopyDbgUsers[Copy].end()); @@ -938,3 +1021,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +MachineFunctionPass * +llvm::createMachineCopyPropagationPass(bool UseCopyInstr = false) { + return new MachineCopyPropagation(UseCopyInstr); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp index 42a5e2b7af01..6871ac35b300 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineCycleAnalysis.cpp @@ -8,50 +8,15 @@ #include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/ADT/GenericCycleImpl.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineSSAContext.h" -#include "llvm/InitializePasses.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" using namespace llvm; template class llvm::GenericCycleInfo<llvm::MachineSSAContext>; template class llvm::GenericCycle<llvm::MachineSSAContext>; -namespace { - -/// Legacy analysis pass which computes a \ref MachineCycleInfo. -class MachineCycleInfoWrapperPass : public MachineFunctionPass { - MachineFunction *F = nullptr; - MachineCycleInfo CI; - -public: - static char ID; - - MachineCycleInfoWrapperPass(); - - MachineCycleInfo &getCycleInfo() { return CI; } - const MachineCycleInfo &getCycleInfo() const { return CI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void releaseMemory() override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - -class MachineCycleInfoPrinterPass : public MachineFunctionPass { -public: - static char ID; - - MachineCycleInfoPrinterPass(); - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; -}; - -} // namespace - char MachineCycleInfoWrapperPass::ID = 0; MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass() @@ -87,6 +52,16 @@ void MachineCycleInfoWrapperPass::releaseMemory() { F = nullptr; } +class MachineCycleInfoPrinterPass : public MachineFunctionPass { +public: + static char ID; + + MachineCycleInfoPrinterPass(); + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + char MachineCycleInfoPrinterPass::ID = 0; MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass() @@ -111,3 +86,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) { CI.print(errs()); return false; } + +bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) { + MachineFunction *MF = I.getParent()->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = MF->getSubtarget(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + const TargetInstrInfo *TII = ST.getInstrInfo(); + + // The instruction is cycle invariant if all of its operands are. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (Reg == 0) + continue; + + // An instruction that uses or defines a physical register can't e.g. be + // hoisted, so mark this as not invariant. + if (Register::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) && + !TII->isIgnorableUse(MO)) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead can't be moved. + return false; + } else if (any_of(Cycle->getEntries(), + [&](const MachineBasicBlock *Block) { + return Block->isLiveIn(Reg); + })) { + // If the reg is live into any header of the cycle we can't hoist an + // instruction which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!"); + + // If the cycle contains the definition of an operand, then the instruction + // isn't cycle invariant. + if (Cycle->contains(MRI->getVRegDef(Reg)->getParent())) + return false; + } + + // If we got this far, the instruction is cycle invariant! + return true; +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp index 599a81847592..b726a032ca18 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDebugify.cpp @@ -16,14 +16,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/InitializePasses.h" #include "llvm/Transforms/Utils/Debugify.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp index a39dc79baaa8..346cfedde390 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -7,10 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominanceFrontier.h" -#include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp index 28cff2a4f3f3..0632cde9c6f4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineDominators.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp index 02f58ca5eef0..f58996ea90c6 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunction.cpp @@ -44,7 +44,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -61,7 +60,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DOTGraphTraits.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" @@ -109,6 +107,27 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) { llvm_unreachable("Invalid machine function property"); } +void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) { + if (!F.hasFnAttribute(Attribute::SafeStack)) + return; + + auto *Existing = + dyn_cast_or_null<MDTuple>(F.getMetadata(LLVMContext::MD_annotation)); + + if (!Existing || Existing->getNumOperands() != 2) + return; + + auto *MetadataName = "unsafe-stack-size"; + if (auto &N = Existing->getOperand(0)) { + if (cast<MDString>(N.get())->getString() == MetadataName) { + if (auto &Op = Existing->getOperand(1)) { + auto Val = mdconst::extract<ConstantInt>(Op)->getZExtValue(); + FrameInfo.setUnsafeStackSize(Val); + } + } + } +} + // Pin the vtable to this file. void MachineFunction::Delegate::anchor() {} @@ -133,11 +152,11 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->deleteMachineBasicBlock(MBB); } -static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, +static inline Align getFnStackAlignment(const TargetSubtargetInfo *STI, const Function &F) { if (auto MA = F.getFnStackAlign()) - return MA->value(); - return STI->getFrameLowering()->getStackAlign().value(); + return *MA; + return STI->getFrameLowering()->getStackAlign(); } MachineFunction::MachineFunction(Function &F, const LLVMTargetMachine &Target, @@ -177,6 +196,8 @@ void MachineFunction::init() { /*ForcedRealign=*/CanRealignSP && F.hasFnAttribute(Attribute::StackAlignment)); + setUnsafeStackSize(F, *FrameInfo); + if (F.hasFnAttribute(Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(*F.getFnStackAlign()); @@ -208,9 +229,7 @@ void MachineFunction::init() { "Can't create a MachineFunction using a Module with a " "Target-incompatible DataLayout attached\n"); - PSVManager = - std::make_unique<PseudoSourceValueManager>(*(getSubtarget(). - getInstrInfo())); + PSVManager = std::make_unique<PseudoSourceValueManager>(getTarget()); } MachineFunction::~MachineFunction() { @@ -837,25 +856,6 @@ void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) { LP.TypeIds.push_back(0); } -void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad, - const Function *Filter, - const BlockAddress *RecoverBA) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - SEHHandler Handler; - Handler.FilterOrFinally = Filter; - Handler.RecoverBA = RecoverBA; - LP.SEHHandlers.push_back(Handler); -} - -void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad, - const Function *Cleanup) { - LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - SEHHandler Handler; - Handler.FilterOrFinally = Cleanup; - Handler.RecoverBA = nullptr; - LP.SEHHandlers.push_back(Handler); -} - void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites) { LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); @@ -1012,7 +1012,32 @@ void MachineFunction::substituteDebugValuesForInst(const MachineInstr &Old, } } -auto MachineFunction::salvageCopySSA(MachineInstr &MI) +auto MachineFunction::salvageCopySSA( + MachineInstr &MI, DenseMap<Register, DebugInstrOperandPair> &DbgPHICache) + -> DebugInstrOperandPair { + const TargetInstrInfo &TII = *getSubtarget().getInstrInfo(); + + // Check whether this copy-like instruction has already been salvaged into + // an operand pair. + Register Dest; + if (auto CopyDstSrc = TII.isCopyInstr(MI)) { + Dest = CopyDstSrc->Destination->getReg(); + } else { + assert(MI.isSubregToReg()); + Dest = MI.getOperand(0).getReg(); + } + + auto CacheIt = DbgPHICache.find(Dest); + if (CacheIt != DbgPHICache.end()) + return CacheIt->second; + + // Calculate the instruction number to use, or install a DBG_PHI. + auto OperandPair = salvageCopySSAImpl(MI); + DbgPHICache.insert({Dest, OperandPair}); + return OperandPair; +} + +auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI) -> DebugInstrOperandPair { MachineRegisterInfo &MRI = getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); @@ -1141,26 +1166,13 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI) MachineBasicBlock &InsertBB = *CurInst->getParent(); // We reached the start of the block before finding a defining instruction. - // It could be from a constant register, otherwise it must be an argument. - if (TRI.isConstantPhysReg(State.first)) { - // We can produce a DBG_PHI that identifies the constant physreg. Doesn't - // matter where we put it, as it's constant valued. - assert(CurInst->isCopy()); - } else if (State.first == TRI.getFrameRegister(*this)) { - // LLVM IR is allowed to read the framepointer by calling a - // llvm.frameaddress.* intrinsic. We can support this by emitting a - // DBG_PHI $fp. This isn't ideal, because it extends the behaviours / - // position that DBG_PHIs appear at, limiting what can be done later. - // TODO: see if there's a better way of expressing these variable - // locations. - ; - } else { - // Assert that this is the entry block, or an EH pad. If it isn't, then - // there is some code construct we don't recognise that deals with physregs - // across blocks. - assert(!State.first.isVirtual()); - assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad()); - } + // There are numerous scenarios where this can happen: + // * Constant physical registers, + // * Several intrinsics that allow LLVM-IR to read arbitary registers, + // * Arguments in the entry block, + // * Exception handling landing pads. + // Validating all of them is too difficult, so just insert a DBG_PHI reading + // the variable value at this position, rather than checking it makes sense. // Create DBG_PHI for specified physreg. auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(), @@ -1181,6 +1193,7 @@ void MachineFunction::finalizeDebugInstrRefs() { MI.getOperand(1).ChangeToRegister(0, false); }; + DenseMap<Register, DebugInstrOperandPair> ArgDbgPHIs; for (auto &MBB : *this) { for (auto &MI : MBB) { if (!MI.isDebugRef() || !MI.getOperand(0).isReg()) @@ -1203,7 +1216,7 @@ void MachineFunction::finalizeDebugInstrRefs() { // instruction that defines the source value, see salvageCopySSA docs // for why this is important. if (DefMI.isCopyLike() || TII->isCopyInstr(DefMI)) { - auto Result = salvageCopySSA(DefMI); + auto Result = salvageCopySSA(DefMI, ArgDbgPHIs); MI.getOperand(0).ChangeToImmediate(Result.first); MI.getOperand(1).setImm(Result.second); } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp index 16cde1f601f9..99494122d608 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp index 0e0eb8b8e00f..81c97ba6a086 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineFunctionSplitter.cpp @@ -24,7 +24,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -34,7 +33,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -82,7 +80,7 @@ static bool isColdBlock(const MachineBasicBlock &MBB, const MachineBlockFrequencyInfo *MBFI, ProfileSummaryInfo *PSI) { Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); - if (!Count.hasValue()) + if (!Count) return true; if (PercentileCutoff > 0) { @@ -108,9 +106,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { // We don't want to proceed further for cold functions // or functions of unknown hotness. Lukewarm functions have no prefix. Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); - if (SectionPrefix.hasValue() && - (SectionPrefix.getValue().equals("unlikely") || - SectionPrefix.getValue().equals("unknown"))) { + if (SectionPrefix && (SectionPrefix.getValue().equals("unlikely") || + SectionPrefix.getValue().equals("unknown"))) { return false; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp index 85b266afceef..31f45e194a97 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstr.cpp @@ -11,19 +11,14 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -38,42 +33,30 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> -#include <cstddef> #include <cstdint> #include <cstring> -#include <iterator> #include <utility> using namespace llvm; @@ -163,19 +146,13 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { return nullptr; } -/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in -/// this instruction from their respective use lists. This requires that the -/// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { +void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) { for (MachineOperand &MO : operands()) if (MO.isReg()) MRI.removeRegOperandFromUseList(&MO); } -/// AddRegOperandsToUseLists - Add all of the register operands in -/// this instruction from their respective use lists. This requires that the -/// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { +void MachineInstr::addRegOperandsToUseLists(MachineRegisterInfo &MRI) { for (MachineOperand &MO : operands()) if (MO.isReg()) MRI.addRegOperandToUseList(&MO); @@ -232,16 +209,12 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } -#ifndef NDEBUG - bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata || - Op.getType() == MachineOperand::MO_MCSymbol; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. - assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands() || isDebugOp) && + assert((MCID->isVariadic() || OpNo < MCID->getNumOperands() || + Op.isValidExcessOperand()) && "Trying to add an operand to a machine instr that is already done!"); -#endif MachineRegisterInfo *MRI = getRegInfo(); @@ -300,10 +273,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } -/// RemoveOperand - Erase an operand from an instruction, leaving it with one -/// fewer operand than it started with. -/// -void MachineInstr::RemoveOperand(unsigned OpNo) { +void MachineInstr::removeOperand(unsigned OpNo) { assert(OpNo < getNumOperands() && "Invalid operand number"); untieRegOperand(OpNo); @@ -1401,11 +1371,10 @@ bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const { continue; // A load from a constant PseudoSourceValue is invariant. - if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) + if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { if (PSV->isConstant(&MFI)) continue; - - if (const Value *V = MMO->getValue()) { + } else if (const Value *V = MMO->getValue()) { // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( @@ -1904,7 +1873,7 @@ bool MachineInstr::addRegisterKilled(Register IncomingReg, unsigned OpIdx = DeadOps.back(); if (getOperand(OpIdx).isImplicit() && (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0)) - RemoveOperand(OpIdx); + removeOperand(OpIdx); else getOperand(OpIdx).setIsKill(false); DeadOps.pop_back(); @@ -1969,7 +1938,7 @@ bool MachineInstr::addRegisterDead(Register Reg, unsigned OpIdx = DeadOps.back(); if (getOperand(OpIdx).isImplicit() && (!isInlineAsm() || findInlineAsmFlagIdx(OpIdx) < 0)) - RemoveOperand(OpIdx); + removeOperand(OpIdx); else getOperand(OpIdx).setIsDead(false); DeadOps.pop_back(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp index 759cff179790..2f1d7b976264 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -16,7 +16,8 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include <utility> using namespace llvm; @@ -109,7 +110,7 @@ bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI) { for (auto MII = FirstMI; MII != LastMI; ++MII) - if (MII->getDebugLoc().get()) + if (MII->getDebugLoc()) return MII->getDebugLoc(); return DebugLoc(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp index 500cf8e0b79b..00d75f8231c7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLICM.cpp @@ -240,7 +240,7 @@ namespace { void ExitScopeIfDone( MachineDomTreeNode *Node, DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); + const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); void HoistOutOfLoop(MachineDomTreeNode *HeaderN); @@ -696,19 +696,16 @@ void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) { /// destroy ancestors which are now done. void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, - DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + const DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { if (OpenChildren[Node]) return; - // Pop scope. - ExitScope(Node->getBlock()); - - // Now traverse upwards to pop ancestors whose offsprings are all done. - while (MachineDomTreeNode *Parent = ParentMap[Node]) { - unsigned Left = --OpenChildren[Parent]; - if (Left != 0) + for(;;) { + ExitScope(Node->getBlock()); + // Now traverse upwards to pop ancestors whose offsprings are all done. + MachineDomTreeNode *Parent = ParentMap.lookup(Node); + if (!Parent || --OpenChildren[Parent] != 0) break; - ExitScope(Parent->getBlock()); Node = Parent; } } @@ -999,6 +996,9 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { if (I.isConvergent()) return false; + if (!TII->shouldHoist(I, CurLoop)) + return false; + return true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp index 9b96bc5e5e7f..5cbded4b9264 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -17,13 +17,12 @@ #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp index fdcc8472f1c2..0e8335d4974d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineLoopUtils.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -64,7 +63,11 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, if (Use.getParent()->getParent() != Loop) Uses.push_back(&Use); for (auto *Use : Uses) { - MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg())); + assert(ConstrainRegClass && + "Expected a valid constrained register class!"); + (void)ConstrainRegClass; Use->setReg(R); } } @@ -90,25 +93,24 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction, if (Remaps.count(R)) R = Remaps[R]; OrigPhi.getOperand(InitRegIdx).setReg(R); - MI.RemoveOperand(LoopRegIdx + 1); - MI.RemoveOperand(LoopRegIdx + 0); + MI.removeOperand(LoopRegIdx + 1); + MI.removeOperand(LoopRegIdx + 0); } else { // When peeling back, the initial value is the loop-carried value from // the original loop. Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg(); MI.getOperand(LoopRegIdx).setReg(LoopReg); - MI.RemoveOperand(InitRegIdx + 1); - MI.RemoveOperand(InitRegIdx + 0); + MI.removeOperand(InitRegIdx + 1); + MI.removeOperand(InitRegIdx + 0); } } DebugLoc DL; if (Direction == LPD_Front) { - Preheader->replaceSuccessor(Loop, NewBB); + Preheader->ReplaceUsesOfBlockWith(Loop, NewBB); NewBB->addSuccessor(Loop); Loop->replacePhiUsesWith(Preheader, NewBB); - if (TII->removeBranch(*Preheader) > 0) - TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL); + Preheader->updateTerminator(Loop); TII->removeBranch(*NewBB); TII->insertBranch(*NewBB, Loop, nullptr, {}, DL); } else { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp index 31d4fc7d02bf..23d55a5df9f5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -7,27 +7,18 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Value.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -40,174 +31,24 @@ using namespace llvm; using namespace llvm::dwarf; +static cl::opt<bool> + DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + // Out of line virtual method. MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; -namespace llvm { - -class MMIAddrLabelMapCallbackPtr final : CallbackVH { - MMIAddrLabelMap *Map = nullptr; - -public: - MMIAddrLabelMapCallbackPtr() = default; - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} - - void setPtr(BasicBlock *BB) { - ValueHandleBase::operator=(BB); - } - - void setMap(MMIAddrLabelMap *map) { Map = map; } - - void deleted() override; - void allUsesReplacedWith(Value *V2) override; -}; - -class MMIAddrLabelMap { - MCContext &Context; - struct AddrLabelSymEntry { - /// The symbols for the label. - TinyPtrVector<MCSymbol *> Symbols; - - Function *Fn; // The containing function of the BasicBlock. - unsigned Index; // The index in BBCallbacks for the BasicBlock. - }; - - DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; - - /// Callbacks for the BasicBlock's that we have entries for. We use this so - /// we get notified if a block is deleted or RAUWd. - std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; - - /// This is a per-function list of symbols whose corresponding BasicBlock got - /// deleted. These symbols need to be emitted at some point in the file, so - /// AsmPrinter emits them after the function body. - DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>> - DeletedAddrLabelsNeedingEmission; - -public: - MMIAddrLabelMap(MCContext &context) : Context(context) {} - - ~MMIAddrLabelMap() { - assert(DeletedAddrLabelsNeedingEmission.empty() && - "Some labels for deleted blocks never got emitted"); - } - - ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB); - - void takeDeletedSymbolsForFunction(Function *F, - std::vector<MCSymbol*> &Result); - - void UpdateForDeletedBlock(BasicBlock *BB); - void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); -}; - -} // end namespace llvm - -ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { - assert(BB->hasAddressTaken() && - "Shouldn't get label for block without address taken"); - AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; - - // If we already had an entry for this block, just return it. - if (!Entry.Symbols.empty()) { - assert(BB->getParent() == Entry.Fn && "Parent changed"); - return Entry.Symbols; - } - - // Otherwise, this is a new entry, create a new symbol for it and add an - // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. - BBCallbacks.emplace_back(BB); - BBCallbacks.back().setMap(this); - Entry.Index = BBCallbacks.size() - 1; - Entry.Fn = BB->getParent(); - MCSymbol *Sym = BB->hasAddressTaken() ? Context.createNamedTempSymbol() - : Context.createTempSymbol(); - Entry.Symbols.push_back(Sym); - return Entry.Symbols; -} - -/// If we have any deleted symbols for F, return them. -void MMIAddrLabelMap:: -takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { - DenseMap<AssertingVH<Function>, std::vector<MCSymbol*>>::iterator I = - DeletedAddrLabelsNeedingEmission.find(F); - - // If there are no entries for the function, just return. - if (I == DeletedAddrLabelsNeedingEmission.end()) return; - - // Otherwise, take the list. - std::swap(Result, I->second); - DeletedAddrLabelsNeedingEmission.erase(I); -} - -void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { - // If the block got deleted, there is no need for the symbol. If the symbol - // was already emitted, we can just forget about it, otherwise we need to - // queue it up for later emission when the function is output. - AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]); - AddrLabelSymbols.erase(BB); - assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?"); - BBCallbacks[Entry.Index] = nullptr; // Clear the callback. - - assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && - "Block/parent mismatch"); - - for (MCSymbol *Sym : Entry.Symbols) { - if (Sym->isDefined()) - return; - - // If the block is not yet defined, we need to emit it at the end of the - // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list - // for the containing Function. Since the block is being deleted, its - // parent may already be removed, we have to get the function from 'Entry'. - DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); - } -} - -void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { - // Get the entry for the RAUW'd block and remove it from our map. - AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]); - AddrLabelSymbols.erase(Old); - assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?"); - - AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; - - // If New is not address taken, just move our symbol over to it. - if (NewEntry.Symbols.empty()) { - BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. - NewEntry = std::move(OldEntry); // Set New's entry. - return; - } - - BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. - - // Otherwise, we need to add the old symbols to the new block's set. - llvm::append_range(NewEntry.Symbols, OldEntry.Symbols); -} - -void MMIAddrLabelMapCallbackPtr::deleted() { - Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr())); -} - -void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { - Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); -} - void MachineModuleInfo::initialize() { ObjFileMMI = nullptr; CurCallSite = 0; NextFnNum = 0; - UsesMSVCFloatingPoint = UsesMorestackAddr = false; - HasSplitStack = HasNosplitStack = false; - AddrLabelSymbols = nullptr; + UsesMSVCFloatingPoint = false; + DbgInfoAvailable = false; } void MachineModuleInfo::finalize() { Personalities.clear(); - delete AddrLabelSymbols; - AddrLabelSymbols = nullptr; - Context.reset(); // We don't clear the ExternalContext. @@ -219,16 +60,11 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) : TM(std::move(MMI.TM)), Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr, - nullptr, false), + &MMI.TM.Options.MCOptions, false), MachineFunctions(std::move(MMI.MachineFunctions)) { Context.setObjectFileInfo(MMI.TM.getObjFileLowering()); ObjFileMMI = MMI.ObjFileMMI; CurCallSite = MMI.CurCallSite; - UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint; - UsesMorestackAddr = MMI.UsesMorestackAddr; - HasSplitStack = MMI.HasSplitStack; - HasNosplitStack = MMI.HasNosplitStack; - AddrLabelSymbols = MMI.AddrLabelSymbols; ExternalContext = MMI.ExternalContext; TheModule = MMI.TheModule; } @@ -236,7 +72,7 @@ MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI) MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM) : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(), TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(), - nullptr, nullptr, false) { + nullptr, &TM->Options.MCOptions, false) { Context.setObjectFileInfo(TM->getObjFileLowering()); initialize(); } @@ -245,7 +81,7 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, MCContext *ExtContext) : TM(*TM), Context(TM->getTargetTriple(), TM->getMCAsmInfo(), TM->getMCRegisterInfo(), TM->getMCSubtargetInfo(), - nullptr, nullptr, false), + nullptr, &TM->Options.MCOptions, false), ExternalContext(ExtContext) { Context.setObjectFileInfo(TM->getObjFileLowering()); initialize(); @@ -253,25 +89,6 @@ MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM, MachineModuleInfo::~MachineModuleInfo() { finalize(); } -//===- Address of Block Management ----------------------------------------===// - -ArrayRef<MCSymbol *> -MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { - // Lazily create AddrLabelSymbols. - if (!AddrLabelSymbols) - AddrLabelSymbols = new MMIAddrLabelMap(getContext()); - return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); -} - -void MachineModuleInfo:: -takeDeletedSymbolsForFunction(const Function *F, - std::vector<MCSymbol*> &Result) { - // If no blocks have had their addresses taken, we're done. - if (!AddrLabelSymbols) return; - return AddrLabelSymbols-> - takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); -} - /// \name Exception Handling /// \{ @@ -318,6 +135,13 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) { LastResult = nullptr; } +void MachineModuleInfo::insertFunction(const Function &F, + std::unique_ptr<MachineFunction> &&MF) { + auto I = MachineFunctions.insert(std::make_pair(&F, std::move(MF))); + assert(I.second && "machine function already mapped"); + (void)I; +} + namespace { /// This pass frees the MachineFunction object associated with a Function. @@ -409,7 +233,8 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { Ctx.diagnose( DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, LocCookie)); }); - MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + MMI.DbgInfoAvailable = !DisableDebugInfoPrinting && + !M.debug_compile_units().empty(); return false; } @@ -424,6 +249,7 @@ MachineModuleInfo MachineModuleAnalysis::run(Module &M, ModuleAnalysisManager &) { MachineModuleInfo MMI(TM); MMI.TheModule = &M; - MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); + MMI.DbgInfoAvailable = !DisableDebugInfoPrinting && + !M.debug_compile_units().empty(); return MMI; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp index 680dbe54ffaf..46ad1de78c46 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOperand.cpp @@ -14,9 +14,7 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/MIRFormatter.h" -#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 5347a7b0d890..631768ec986c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -53,10 +53,8 @@ void MachineOptimizationRemarkEmitter::emit( LLVMContext &Ctx = MF.getFunction().getContext(); // Only emit it if its hotness meets the threshold. - if (OptDiag.getHotness().getValueOr(0) < - Ctx.getDiagnosticsHotnessThreshold()) { + if (OptDiag.getHotness().value_or(0) < Ctx.getDiagnosticsHotnessThreshold()) return; - } Ctx.diagnose(OptDiag); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp index 7783b5e0d3cc..5da68abc8f6a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,6 +59,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" @@ -82,9 +84,17 @@ using namespace llvm; using namespace ore; using namespace outliner; +// Statistics for outlined functions. STATISTIC(NumOutlined, "Number of candidates outlined"); STATISTIC(FunctionsCreated, "Number of functions created"); +// Statistics for instruction mapping. +STATISTIC(NumLegalInUnsignedVec, "Number of legal instrs in unsigned vector"); +STATISTIC(NumIllegalInUnsignedVec, + "Number of illegal instrs in unsigned vector"); +STATISTIC(NumInvisible, "Number of invisible instrs in unsigned vector"); +STATISTIC(UnsignedVecSize, "Size of unsigned vector"); + // Set to true if the user wants the outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -188,6 +198,8 @@ struct InstructionMapper { assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && "Tried to assign DenseMap tombstone or empty key to instruction."); + // Statistics. + ++NumLegalInUnsignedVec; return MINumber; } @@ -215,6 +227,8 @@ struct InstructionMapper { InstrListForMBB.push_back(It); UnsignedVecForMBB.push_back(IllegalInstrNumber); IllegalInstrNumber--; + // Statistics. + ++NumIllegalInUnsignedVec; assert(LegalInstrNumber < IllegalInstrNumber && "Instruction mapping overflow!"); @@ -293,6 +307,7 @@ struct InstructionMapper { case InstrType::Invisible: // Normally this is set by mapTo(Blah)Unsigned, but we just want to // skip this instruction. So, unset the flag here. + ++NumInvisible; AddedIllegalLastTime = false; break; } @@ -623,6 +638,15 @@ MachineFunction *MachineOutliner::createOutlinedFunction( TII.mergeOutliningCandidateAttributes(*F, OF.Candidates); + // Set uwtable, so we generate eh_frame. + UWTableKind UW = std::accumulate( + OF.Candidates.cbegin(), OF.Candidates.cend(), UWTableKind::None, + [](UWTableKind K, const outliner::Candidate &C) { + return std::max(K, C.getMF()->getFunction().getUWTableKind()); + }); + if (UW != UWTableKind::None) + F->setUWTableKind(UW); + BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); IRBuilder<> Builder(EntryBB); Builder.CreateRetVoid(); @@ -641,17 +665,20 @@ MachineFunction *MachineOutliner::createOutlinedFunction( ++I) { if (I->isDebugInstr()) continue; - MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + + // Don't keep debug information for outlined instructions. + auto DL = DebugLoc(); if (I->isCFIInstruction()) { - unsigned CFIIndex = NewMI->getOperand(0).getCFIIndex(); + unsigned CFIIndex = I->getOperand(0).getCFIIndex(); MCCFIInstruction CFI = Instrs[CFIIndex]; - (void)MF.addFrameInst(CFI); + BuildMI(MBB, MBB.end(), DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MF.addFrameInst(CFI)); + } else { + MachineInstr *NewMI = MF.CloneMachineInstr(&*I); + NewMI->dropMemRefs(MF); + NewMI->setDebugLoc(DL); + MBB.insert(MBB.end(), NewMI); } - NewMI->dropMemRefs(MF); - - // Don't keep debug information for outlined instructions. - NewMI->setDebugLoc(DebugLoc()); - MBB.insert(MBB.end(), NewMI); } // Set normal properties for a late MachineFunction. @@ -831,9 +858,10 @@ bool MachineOutliner::outline(Module &M, MBB.erase(std::next(StartIt), std::next(EndIt)); // Keep track of what we removed by marking them all as -1. - std::for_each(Mapper.UnsignedVec.begin() + C.getStartIdx(), - Mapper.UnsignedVec.begin() + C.getEndIdx() + 1, - [](unsigned &I) { I = static_cast<unsigned>(-1); }); + for (unsigned &I : + llvm::make_range(Mapper.UnsignedVec.begin() + C.getStartIdx(), + Mapper.UnsignedVec.begin() + C.getEndIdx() + 1)) + I = static_cast<unsigned>(-1); OutlinedSomething = true; // Statistics. @@ -896,6 +924,9 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M, // MBB is suitable for outlining. Map it to a list of unsigneds. Mapper.convertToUnsignedVec(MBB, *TII); } + + // Statistics. + UnsignedVecSize = Mapper.UnsignedVec.size(); } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp index 762395542b40..8d500398f55e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -29,6 +29,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -43,6 +44,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/LiveIntervals.h" @@ -55,7 +57,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachinePipeliner.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ModuloSchedule.h" #include "llvm/CodeGen/RegisterPressure.h" @@ -66,7 +67,6 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" @@ -109,7 +109,6 @@ STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages"); /// A command line option to turn software pipelining on or off. static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), - cl::ZeroOrMore, cl::desc("Enable Software Pipelining")); /// A command line option to enable SWP at -Os. @@ -147,8 +146,8 @@ static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1)); #endif static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii", - cl::ReallyHidden, cl::init(false), - cl::ZeroOrMore, cl::desc("Ignore RecMII")); + cl::ReallyHidden, + cl::desc("Ignore RecMII")); static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false)); @@ -169,10 +168,9 @@ static cl::opt<bool> ExperimentalCodeGen( namespace llvm { // A command line option to enable the CopyToPhi DAG mutation. -cl::opt<bool> - SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, - cl::init(true), cl::ZeroOrMore, - cl::desc("Enable CopyToPhi DAG Mutation")); +cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, + cl::init(true), + cl::desc("Enable CopyToPhi DAG Mutation")); } // end namespace llvm @@ -255,6 +253,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { << "Failed to pipeline loop"; }); + LI.LoopPipelinerInfo.reset(); return Changed; } @@ -262,6 +261,7 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) { Changed = swingModuloScheduler(L); + LI.LoopPipelinerInfo.reset(); return Changed; } @@ -354,7 +354,8 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) { LI.LoopInductionVar = nullptr; LI.LoopCompare = nullptr; - if (!TII->analyzeLoopForPipelining(L.getTopBlock())) { + LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock()); + if (!LI.LoopPipelinerInfo) { LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n"); NumFailLoop++; ORE->emit([&]() { @@ -419,7 +420,7 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo, - II_setByPragma); + II_setByPragma, LI.LoopPipelinerInfo.get()); MachineBasicBlock *MBB = L.getHeader(); // The kernel should not include any terminator instructions. These @@ -513,7 +514,7 @@ void SwingSchedulerDAG::schedule() { // Don't pipeline large loops. if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) { LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii - << ", we don't pipleline large loops\n"); + << ", we don't pipeline large loops\n"); NumFailLargeMaxMII++; Pass.ORE->emit([&]() { return MachineOptimizationRemarkAnalysis( @@ -1297,8 +1298,7 @@ bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets, for (auto W : AdjK[V]) { if (W < S) continue; - if (B[W].count(SV) == 0) - B[W].insert(SV); + B[W].insert(SV); } } Stack.pop_back(); @@ -1422,7 +1422,7 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) { /// We ignore the back-edge recurrence in order to avoid unbounded recursion /// in the calculation of the ASAP, ALAP, etc functions. static bool ignoreDependence(const SDep &D, bool isPred) { - if (D.isArtificial()) + if (D.isArtificial() || D.getSUnit()->isBoundaryNode()) return true; return D.getKind() == SDep::Anti && isPred; } @@ -1471,6 +1471,8 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) { SUnit *SU = &SUnits[I]; for (const SDep &S : SU->Succs) { SUnit *succ = S.getSUnit(); + if (succ->isBoundaryNode()) + continue; if (S.getLatency() == 0) zeroLatencyHeight = std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1); @@ -1575,7 +1577,9 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path, return Path.contains(Cur); bool FoundPath = false; for (auto &SI : Cur->Succs) - FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); + if (!ignoreDependence(SI, false)) + FoundPath |= + computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited); for (auto &PI : Cur->Preds) if (PI.getKind() == SDep::Anti) FoundPath |= @@ -1663,7 +1667,7 @@ void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) { LLVM_DEBUG( dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") " << TRI->getRegPressureSetName(RPDelta.Excess.getPSet()) - << ":" << RPDelta.Excess.getUnitInc()); + << ":" << RPDelta.Excess.getUnitInc() << "\n"); NS.setExceedPressure(SU); break; } @@ -1718,7 +1722,7 @@ void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) { } /// Add the nodes that do not belong to a recurrence set into groups -/// based upon connected componenets. +/// based upon connected components. void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) { SetVector<SUnit *> NodesAdded; SmallPtrSet<SUnit *, 8> Visited; @@ -1788,7 +1792,8 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, NodesAdded.insert(SU); for (auto &SI : SU->Succs) { SUnit *Successor = SI.getSUnit(); - if (!SI.isArtificial() && NodesAdded.count(Successor) == 0) + if (!SI.isArtificial() && !Successor->isBoundaryNode() && + NodesAdded.count(Successor) == 0) addConnectedNodes(Successor, NewSet, NodesAdded); } for (auto &PI : SU->Preds) { @@ -1803,8 +1808,7 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet, static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2, SmallSetVector<SUnit *, 8> &Result) { Result.clear(); - for (unsigned i = 0, e = Set1.size(); i != e; ++i) { - SUnit *SU = Set1[i]; + for (SUnit *SU : Set1) { if (Set2.count(SU) != 0) Result.insert(SU); } @@ -2080,6 +2084,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) { }); } while (++NI != NE && scheduleFound); + // If a schedule is found, ensure non-pipelined instructions are in stage 0 + if (scheduleFound) + scheduleFound = + Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo); + // If a schedule is found, check if it is a valid schedule too. if (scheduleFound) scheduleFound = Schedule.isValidSchedule(this); @@ -2263,7 +2272,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) { bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc) { if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) || - Dep.isArtificial()) + Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode()) return false; if (!SwpPruneLoopCarried) @@ -2430,7 +2439,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) { while (!Worklist.empty()) { const SDep &Cur = Worklist.pop_back_val(); SUnit *SuccSU = Cur.getSUnit(); - if (Visited.count(SuccSU)) + if (Visited.count(SuccSU) || SuccSU->isBoundaryNode()) continue; std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU); if (it == InstrToCycle.end()) @@ -2697,21 +2706,91 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, return false; } +/// Determine transitive dependences of unpipelineable instructions +SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes( + SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { + SmallSet<SUnit *, 8> DoNotPipeline; + SmallVector<SUnit *, 8> Worklist; + + for (auto &SU : SSD->SUnits) + if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr())) + Worklist.push_back(&SU); + + while (!Worklist.empty()) { + auto SU = Worklist.pop_back_val(); + if (DoNotPipeline.count(SU)) + continue; + LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n"); + DoNotPipeline.insert(SU); + for (auto &Dep : SU->Preds) + Worklist.push_back(Dep.getSUnit()); + if (SU->getInstr()->isPHI()) + for (auto &Dep : SU->Succs) + if (Dep.getKind() == SDep::Anti) + Worklist.push_back(Dep.getSUnit()); + } + return DoNotPipeline; +} + +// Determine all instructions upon which any unpipelineable instruction depends +// and ensure that they are in stage 0. If unable to do so, return false. +bool SMSchedule::normalizeNonPipelinedInstructions( + SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) { + SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI); + + int NewLastCycle = INT_MIN; + for (SUnit &SU : SSD->SUnits) { + if (!SU.isInstr()) + continue; + if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) { + NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]); + continue; + } + + // Put the non-pipelined instruction as early as possible in the schedule + int NewCycle = getFirstCycle(); + for (auto &Dep : SU.Preds) + NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle); + + int OldCycle = InstrToCycle[&SU]; + if (OldCycle != NewCycle) { + InstrToCycle[&SU] = NewCycle; + auto &OldS = getInstructions(OldCycle); + llvm::erase_value(OldS, &SU); + getInstructions(NewCycle).emplace_back(&SU); + LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum + << ") is not pipelined; moving from cycle " << OldCycle + << " to " << NewCycle << " Instr:" << *SU.getInstr()); + } + NewLastCycle = std::max(NewLastCycle, NewCycle); + } + LastCycle = NewLastCycle; + return true; +} + // Check if the generated schedule is valid. This function checks if // an instruction that uses a physical register is scheduled in a // different stage than the definition. The pipeliner does not handle // physical register values that may cross a basic block boundary. +// Furthermore, if a physical def/use pair is assigned to the same +// cycle, orderDependence does not guarantee def/use ordering, so that +// case should be considered invalid. (The test checks for both +// earlier and same-cycle use to be more robust.) bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { for (SUnit &SU : SSD->SUnits) { if (!SU.hasPhysRegDefs) continue; int StageDef = stageScheduled(&SU); + int CycleDef = InstrToCycle[&SU]; assert(StageDef != -1 && "Instruction should have been scheduled."); for (auto &SI : SU.Succs) - if (SI.isAssignedRegDep()) - if (Register::isPhysicalRegister(SI.getReg())) + if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode()) + if (Register::isPhysicalRegister(SI.getReg())) { if (stageScheduled(SI.getSUnit()) != StageDef) return false; + if (InstrToCycle[SI.getSUnit()] <= CycleDef) + return false; + } } return true; } @@ -2998,7 +3077,7 @@ bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const { if (!SCDesc->isValid()) { LLVM_DEBUG({ dbgs() << "No valid Schedule Class Desc for schedClass!\n"; - dbgs() << "isPseduo:" << MID->isPseudo() << "\n"; + dbgs() << "isPseudo:" << MID->isPseudo() << "\n"; }); return true; } @@ -3038,7 +3117,7 @@ void ResourceManager::reserveResources(const MCInstrDesc *MID) { if (!SCDesc->isValid()) { LLVM_DEBUG({ dbgs() << "No valid Schedule Class Desc for schedClass!\n"; - dbgs() << "isPseduo:" << MID->isPseudo() << "\n"; + dbgs() << "isPseudo:" << MID->isPseudo() << "\n"; }); return; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 1a4ad53ddf81..511bb80052c2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -651,3 +650,18 @@ bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const { } return false; } + +bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isArgumentRegister(MF, Reg); +} + +bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isFixedRegister(MF, Reg); +} + +bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp index 8db893535daf..01cea85ecc7c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSSAContext.cpp @@ -14,7 +14,9 @@ #include "llvm/CodeGen/MachineSSAContext.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp index b043d4c1b0c1..4e00a211713e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineScheduler.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -752,7 +751,7 @@ void ScheduleDAGMI::moveInstruction( } bool ScheduleDAGMI::checkSchedLimit() { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { CurrentTop = CurrentBottom; return false; @@ -920,12 +919,10 @@ void ScheduleDAGMI::placeDebugValues() { MachineBasicBlock::iterator OrigPrevMI = P.second; if (&*RegionBegin == DbgValue) ++RegionBegin; - BB->splice(++OrigPrevMI, BB, DbgValue); - if (OrigPrevMI == std::prev(RegionEnd)) + BB->splice(std::next(OrigPrevMI), BB, DbgValue); + if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd) RegionEnd = DbgValue; } - DbgValues.clear(); - FirstDbgValue = nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -2008,7 +2005,7 @@ void SchedBoundary::reset() { ReservedCycles.clear(); ReservedCyclesIndex.clear(); ResourceGroupSubUnitMasks.clear(); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is // reserved (SchedBoundary::ReservedCycles). @@ -2196,7 +2193,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) { unsigned NRCycle, InstanceIdx; std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles); if (NRCycle > CurrCycle) { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " @@ -2263,7 +2260,7 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, unsigned Idx) { assert(SU->getInstr() && "Scheduled SUnit must have instr"); -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS // ReadyCycle was been bumped up to the CurrCycle when this node was // scheduled, but CurrCycle may have been eagerly advanced immediately after // scheduling, so may now be greater than ReadyCycle. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index bc03776bde19..006ba9273dfb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -16,19 +16,20 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -95,18 +96,18 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold( cl::init(20), cl::Hidden); static cl::opt<bool> -SinkInstsIntoLoop("sink-insts-to-avoid-spills", - cl::desc("Sink instructions into loops to avoid " - "register spills"), - cl::init(false), cl::Hidden); - -static cl::opt<unsigned> SinkIntoLoopLimit( - "machine-sink-loop-limit", - cl::desc("The maximum number of instructions considered for loop sinking."), + SinkInstsIntoCycle("sink-insts-to-avoid-spills", + cl::desc("Sink instructions into cycles to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> SinkIntoCycleLimit( + "machine-sink-cycle-limit", + cl::desc("The maximum number of instructions considered for cycle sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); -STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); +STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); @@ -119,7 +120,7 @@ namespace { MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree - MachineLoopInfo *LI; + MachineCycleInfo *CI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; @@ -180,8 +181,9 @@ namespace { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachinePostDominatorTree>(); - AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineCycleInfoWrapperPass>(); AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addPreserved<MachineCycleInfoWrapperPass>(); AU.addPreserved<MachineLoopInfo>(); if (UseBlockFreqInfo) AU.addRequired<MachineBlockFrequencyInfo>(); @@ -232,9 +234,9 @@ namespace { MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, - SmallVectorImpl<MachineInstr *> &Candidates); - bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); + void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, + SmallVectorImpl<MachineInstr *> &Candidates); + bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -261,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) @@ -378,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } -void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, +void MachineSinking::FindCycleSinkCandidates( + MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl<MachineInstr *> &Candidates) { for (auto &MI : *BB) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); continue; } - if (!L->isLoopInvariant(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); + if (!isCycleInvariant(Cycle, MI)) { + LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) @@ -409,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B if (!MRI->hasOneDef(MO.getReg())) continue; - LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } @@ -425,22 +428,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); DT = &getAnalysis<MachineDominatorTree>(); PDT = &getAnalysis<MachinePostDominatorTree>(); - LI = &getAnalysis<MachineLoopInfo>(); + CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); RegClassInfo.runOnMachineFunction(MF); - // MachineSink currently uses MachineLoopInfo, which only recognizes natural - // loops. As such, we could sink instructions into irreducible cycles, which - // would be non-profitable. - // WARNING: The current implementation of hasStoreBetween() is incorrect for - // sinking into irreducible cycles (PR53990), this bailout is currently - // necessary for correctness, not just profitability. - ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); - if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI)) - return false; - bool EverMadeChange = false; while (true) { @@ -473,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { EverMadeChange = true; } - if (SinkInstsIntoLoop) { - SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end()); - for (auto *L : Loops) { - MachineBasicBlock *Preheader = LI->findLoopPreheader(L); + if (SinkInstsIntoCycle) { + SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(), + CI->toplevel_end()); + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); if (!Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); continue; } SmallVector<MachineInstr *, 8> Candidates; - FindLoopSinkCandidates(L, Preheader, Candidates); + FindCycleSinkCandidates(Cycle, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoLoopLimit) { - LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " "be analysed."); break; } - if (!SinkIntoLoop(L, *I)) + if (!SinkIntoCycle(Cycle, *I)) break; EverMadeChange = true; - ++NumLoopSunk; + ++NumCycleSunk; } } } @@ -520,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an - // unreachable loop there may be nowhere to stop. + // unreachable cycle there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; - // Cache all successors, sorted by frequency info and loop depth. + // Cache all successors, sorted by frequency info and cycle depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. @@ -644,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; - // Avoid breaking back edge. From == To means backedge for single BB loop. + // Avoid breaking back edge. From == To means backedge for single BB cycle. if (!SplitEdges || FromBB == ToBB) return false; - // Check for backedges of more "complex" loops. - if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && - LI->isLoopHeader(ToBB)) + MachineCycle *FromCycle = CI->getCycle(FromBB); + MachineCycle *ToCycle = CI->getCycle(ToBB); + + // Check for backedges of more "complex" cycles. + if (FromCycle == ToCycle && FromCycle && + (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) return false; // It's not always legal to break critical edges and sink the computation @@ -753,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, if (!PDT->dominates(SuccToSinkTo, MBB)) return true; - // It is profitable to sink an instruction from a deeper loop to a shallower - // loop, even if the latter post-dominates the former (PR21115). - if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + // It is profitable to sink an instruction from a deeper cycle to a shallower + // cycle, even if the latter post-dominates the former (PR21115). + if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. @@ -776,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); - MachineLoop *ML = LI->getLoopFor(MBB); + MachineCycle *MCycle = CI->getCycle(MBB); - // If the instruction is not inside a loop, it is not profitable to sink MI to + // If the instruction is not inside a cycle, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. - if (!ML) + if (!MCycle) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { @@ -798,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; }; - // If this instruction is inside a loop and sinking this instruction can make + // If this instruction is inside a Cycle and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. @@ -826,14 +823,17 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); - // DefMI is defined outside of loop. There should be no live range - // impact for this operand. Defination outside of loop means: - // 1: defination is outside of loop. - // 2: defination is in this loop, but it is a PHI in the loop header. - if (LI->getLoopFor(DefMI->getParent()) != ML || - (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) + if (!DefMI) + continue; + MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); + // DefMI is defined outside of cycle. There should be no live range + // impact for this operand. Defination outside of cycle means: + // 1: defination is outside of cycle. + // 2: defination is in this cycle, but it is a PHI in the cycle header. + if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && + Cycle->getHeader() == DefMI->getParent())) continue; - // The DefMI is defined inside the loop. + // The DefMI is defined inside the cycle. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { @@ -843,8 +843,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, } } - // If MI is in loop and all its operands are alive across the whole loop or if - // no operand sinking make register pressure set exceed limit, it is + // If MI is in cycle and all its operands are alive across the whole cycle or + // if no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } @@ -876,14 +876,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccs.push_back(DTChild->getBlock()); } - // Sort Successors according to their loop depth or block frequency info. + // Sort Successors according to their cycle depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq - : LI->getLoopDepth(L) < LI->getLoopDepth(R); + : CI->getCycleDepth(L) < CI->getCycleDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); @@ -898,7 +898,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); - // Loop over all the operands of the specified instruction. If there is + // loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we @@ -945,7 +945,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies - // higher priority, otherwise prioritize smaller loop depths. + // higher priority, otherwise prioritize smaller cycle depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; @@ -968,7 +968,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, } // It is not possible to sink an instruction into its own block. This can - // happen with loops. + // happen with cycles. if (MBB == SuccToSinkTo) return nullptr; @@ -1093,8 +1093,7 @@ using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>; /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, - SmallVectorImpl<MIRegs> &DbgValuesToSink) { - + ArrayRef<MIRegs> DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. @@ -1113,7 +1112,7 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. - for (auto DbgValueToSink : DbgValuesToSink) { + for (const auto &DbgValueToSink : DbgValuesToSink) { MachineInstr *DbgMI = DbgValueToSink.first; MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); @@ -1178,7 +1177,7 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, // If this BB is too big or the block number in straight line between From // and To is too big, stop searching to save compiling time. - if (BB->size() > SinkLoadInstsPerBlockThreshold || + if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) || HandledDomBlocks.size() > SinkLoadBlocksThreshold) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) @@ -1223,69 +1222,78 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into loops if profitable. This especially tries to prevent -/// register spills caused by register pressure if there is little to no -/// overhead moving instructions into loops. -bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); - MachineBasicBlock *Preheader = L->getLoopPreheader(); - assert(Preheader && "Loop sink needs a preheader block"); +/// Sink instructions into cycles if profitable. This especially tries to +/// prevent register spills caused by register pressure if there is little to no +/// overhead moving instructions into cycles. +bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { + LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + assert(Preheader && "Cycle sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); - if (!L->contains(&MI)) { - LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); + if (!Cycle->contains(MI.getParent())) { + LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop + // the instruction (and thus possibly executing it on every cycle // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " + LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); CanSink = false; break; } - LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << + LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { - LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); + LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { - LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); + LLVM_DEBUG( + dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); return false; } - if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { - LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); + if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) { + LLVM_DEBUG( + dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); return false; } - LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); - SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); + LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); + SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, + I); + + // Conservatively clear any kill flags on uses of sunk instruction + for (MachineOperand &MO : I.operands()) { + if (MO.isReg() && MO.readsReg()) + RegsToClearKillFlags.insert(MO.getReg()); + } // The instruction is moved from its basic block, so do not retain the // debug information. @@ -1294,6 +1302,45 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { return true; } +/// Return true if a target defined block prologue instruction interferes +/// with a sink candidate. +static bool blockPrologueInterferes(MachineBasicBlock *BB, + MachineBasicBlock::iterator End, + MachineInstr &MI, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + const MachineRegisterInfo *MRI) { + if (BB->begin() == End) + return false; // no prologue + for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) { + // Only check target defined prologue instructions + if (!TII->isBasicBlockPrologue(*PI)) + continue; + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + if (Register::isPhysicalRegister(Reg) && + (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg)))) + continue; + if (PI->modifiesRegister(Reg, TRI)) + return true; + } else { + if (PI->readsRegister(Reg, TRI)) + return true; + // Check for interference with non-dead defs + auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI); + if (DefOp && !DefOp->isDead()) + return true; + } + } + } + return false; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -1368,9 +1415,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, TryBreak = true; } - // Don't sink instructions into a loop. - if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { - LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + // Don't sink instructions into a cycle. + if (!TryBreak && CI->getCycle(SuccToSinkTo) && + (!CI->getCycle(SuccToSinkTo)->isReducible() || + CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { + LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); TryBreak = true; } @@ -1405,9 +1454,12 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, } // Determine where to insert into. Skip phi nodes. - MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); - while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) - ++InsertPos; + MachineBasicBlock::iterator InsertPos = + SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin()); + if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) { + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); + return false; + } // Collect debug users of any vreg that this inst defines. SmallVector<MIRegs, 4> DbgUsersToSink; @@ -1696,14 +1748,6 @@ static bool hasRegisterDependency(MachineInstr *MI, return HasRegDependency; } -static SmallSet<MCRegister, 4> getRegUnits(MCRegister Reg, - const TargetRegisterInfo *TRI) { - SmallSet<MCRegister, 4> RegUnits; - for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) - RegUnits.insert(*RI); - return RegUnits; -} - bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, @@ -1749,14 +1793,15 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, } // Record debug use of each reg unit. - SmallSet<MCRegister, 4> RegUnits = getRegUnits(MO.getReg(), TRI); - for (MCRegister Reg : RegUnits) - MIUnits[Reg].push_back(MO.getReg()); + for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); + ++RI) + MIUnits[*RI].push_back(MO.getReg()); } } if (IsValid) { - for (auto RegOps : MIUnits) - SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second}); + for (auto &RegOps : MIUnits) + SeenDbgInstrs[RegOps.first].emplace_back(&MI, + std::move(RegOps.second)); } continue; } @@ -1803,22 +1848,29 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, if (!MO.isReg() || !MO.isDef()) continue; - SmallSet<MCRegister, 4> Units = getRegUnits(MO.getReg(), TRI); - for (MCRegister Reg : Units) { - for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) { + for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) { + for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) { auto &Regs = DbgValsToSinkMap[MIRegs.first]; for (unsigned Reg : MIRegs.second) Regs.push_back(Reg); } } } - SmallVector<MIRegs, 4> DbgValsToSink(DbgValsToSinkMap.begin(), - DbgValsToSinkMap.end()); + auto DbgValsToSink = DbgValsToSinkMap.takeVector(); + + LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); + + MachineBasicBlock::iterator InsertPos = + SuccBB->SkipPHIsAndLabels(SuccBB->begin()); + if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) { + LLVM_DEBUG( + dbgs() << " *** Not sinking: prologue interference\n"); + continue; + } // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); - MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp index 0803c2b8b85a..a85dbf1de1ee 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStableHash.cpp @@ -12,29 +12,30 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineStableHash.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/CodeGen/MIRFormatter.h" -#include "llvm/CodeGen/MIRPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/StableHashing.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/IRPrintingPasses.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/ModuleSlotTracker.h" -#include "llvm/MC/MCDwarf.h" -#include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "machine-stable-hash" @@ -64,7 +65,10 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) { case MachineOperand::MO_Register: if (Register::isVirtualRegister(MO.getReg())) { const MachineRegisterInfo &MRI = MO.getParent()->getMF()->getRegInfo(); - return MRI.getVRegDef(MO.getReg())->getOpcode(); + SmallVector<unsigned> DefOpcodes; + for (auto &Def : MRI.def_instructions(MO.getReg())) + DefOpcodes.push_back(Def.getOpcode()); + return hash_combine_range(DefOpcodes.begin(), DefOpcodes.end()); } // Register operands don't have target flags. @@ -192,3 +196,21 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs, return stable_hash_combine_range(HashComponents.begin(), HashComponents.end()); } + +stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) { + SmallVector<stable_hash> HashComponents; + // TODO: Hash more stuff like block alignment and branch probabilities. + for (auto &MI : MBB) + HashComponents.push_back(stableHashValue(MI)); + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} + +stable_hash llvm::stableHashValue(const MachineFunction &MF) { + SmallVector<stable_hash> HashComponents; + // TODO: Hash lots more stuff like function alignment and stack objects. + for (auto &MBB : MF) + HashComponents.push_back(stableHashValue(MBB)); + return stable_hash_combine_range(HashComponents.begin(), + HashComponents.end()); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp index 86cf4999d4b0..6128248a028e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineStripDebug.cpp @@ -10,10 +10,10 @@ /// tests can be debugified without affecting the output MIR. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/Debugify.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp index c9d3e473062b..db04f2bcc095 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp @@ -32,10 +32,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRangeCalc.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -48,6 +48,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterBank.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -55,12 +57,14 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCTargetOptions.h" @@ -95,6 +99,7 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; + const RegisterBankInfo *RBI; unsigned foundErrors; @@ -370,6 +375,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { TM = &MF.getTarget(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); + RBI = MF.getSubtarget().getRegBankInfo(); MRI = &MF.getRegInfo(); const bool isFunctionFailedISel = MF.getProperties().hasProperty( @@ -442,7 +448,7 @@ unsigned MachineVerifier::verify(const MachineFunction &MF) { for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &Op = MI.getOperand(I); if (Op.getParent() != &MI) { - // Make sure to use correct addOperand / RemoveOperand / ChangeTo + // Make sure to use correct addOperand / removeOperand / ChangeTo // functions when replacing operands of a MachineInstr. report("Instruction has operand with wrong parent set", &MI); } @@ -1000,17 +1006,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } - if (MRI->getRegBankOrNull(Src) != MRI->getRegBankOrNull(Dst)) { - report( - Twine(OpcName, " source and destination register banks must match"), - MI); + const RegisterBank *SrcRB = RBI->getRegBank(Src, *MRI, *TRI); + const RegisterBank *DstRB = RBI->getRegBank(Dst, *MRI, *TRI); + + // Allow only the source bank to be set. + if ((SrcRB && DstRB && SrcRB != DstRB) || (DstRB && !SrcRB)) { + report(Twine(OpcName, " cannot change register bank"), MI); break; } - if (MRI->getRegClassOrNull(Src) != MRI->getRegClassOrNull(Dst)) + // Don't allow a class change. Do allow member class->regbank. + const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Dst); + if (DstRC && DstRC != MRI->getRegClassOrNull(Src)) { report( Twine(OpcName, " source and destination register classes must match"), MI); + break; + } break; } @@ -1072,6 +1084,18 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (ValTy.getSizeInBytes() < MMO.getSize()) report("store memory size cannot exceed value size", MI); } + + const AtomicOrdering Order = MMO.getSuccessOrdering(); + if (Opc == TargetOpcode::G_STORE) { + if (Order == AtomicOrdering::Acquire || + Order == AtomicOrdering::AcquireRelease) + report("atomic store cannot use acquire ordering", MI); + + } else { + if (Order == AtomicOrdering::Release || + Order == AtomicOrdering::AcquireRelease) + report("atomic load cannot use release ordering", MI); + } } break; @@ -1628,6 +1652,43 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { verifyAllRegOpsScalar(*MI, *MRI); break; } + case TargetOpcode::G_IS_FPCLASS: { + LLT DestTy = MRI->getType(MI->getOperand(0).getReg()); + LLT DestEltTy = DestTy.getScalarType(); + if (!DestEltTy.isScalar()) { + report("Destination must be a scalar or vector of scalars", MI); + break; + } + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT SrcEltTy = SrcTy.getScalarType(); + if (!SrcEltTy.isScalar()) { + report("Source must be a scalar or vector of scalars", MI); + break; + } + if (!verifyVectorElementMatch(DestTy, SrcTy, MI)) + break; + const MachineOperand &TestMO = MI->getOperand(2); + if (!TestMO.isImm()) { + report("floating-point class set (operand 2) must be an immediate", MI); + break; + } + int64_t Test = TestMO.getImm(); + if (Test < 0 || Test > fcAllFlags) { + report("Incorrect floating-point class set (operand 2)", MI); + break; + } + const MachineOperand &SemanticsMO = MI->getOperand(3); + if (!SemanticsMO.isImm()) { + report("floating-point semantics (operand 3) must be an immediate", MI); + break; + } + int64_t Semantics = SemanticsMO.getImm(); + if (Semantics < 0 || Semantics > APFloat::S_MaxSemantics) { + report("Incorrect floating-point semantics (operand 3)", MI); + break; + } + break; + } default: break; } @@ -1912,6 +1973,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugInstr()) checkLiveness(MO, MONum); + if (MO->isDef() && MO->isUndef() && !MO->getSubReg() && + MO->getReg().isVirtual()) // TODO: Apply to physregs too + report("Undef virtual register def operands require a subregister", MO, MONum); + // Verify the consistency of tied operands. if (MO->isTied()) { unsigned OtherIdx = MI->findTiedOperandIdx(MONum); @@ -2148,6 +2213,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } break; + case MachineOperand::MO_CFIIndex: + if (MO->getCFIIndex() >= MF->getFrameInstructions().size()) + report("CFI instruction has invalid index", MO, MONum); + break; + default: break; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp index b0760322064c..fa5df68b8abc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MacroFusion.cpp @@ -12,11 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MacroFusion.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp index f91a9d2c3a32..3245d9649be1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCContext.h" @@ -157,7 +158,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() { SmallVector<MachineBasicBlock *, 4> EpilogBBs; // Generate the epilog instructions to complete the pipeline. - generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs); + generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs); // We need this step because the register allocation doesn't handle some // situations well, so we insert copies to help out. @@ -239,11 +240,9 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage, /// Generate the pipeline epilog code. The epilog code finishes the iterations /// that were started in either the prolog or the kernel. We create a basic /// block for each stage that needs to complete. -void ModuloScheduleExpander::generateEpilog(unsigned LastStage, - MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, - MBBVectorTy &PrologBBs) { +void ModuloScheduleExpander::generateEpilog( + unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, + ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) { // We need to change the branch from the kernel to the first epilog block, so // this call to analyze branch uses the kernel rather than the original BB. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -313,7 +312,12 @@ void ModuloScheduleExpander::generateEpilog(unsigned LastStage, // Create a branch to the new epilog from the kernel. // Remove the original branch and add a new branch to the epilog. TII->removeBranch(*KernelBB); - TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); + assert((OrigBB == TBB || OrigBB == FBB) && + "Unable to determine looping branch direction"); + if (OrigBB != TBB) + TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc()); + else + TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc()); // Add a branch to the loop exit. if (EpilogBBs.size() > 0) { MachineBasicBlock *LastEpilogBB = EpilogBBs.back(); @@ -813,8 +817,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) { break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) if (MI.getOperand(i + 1).getMBB() == Incoming) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); + MI.removeOperand(i + 1); + MI.removeOperand(i); break; } } @@ -846,7 +850,7 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB, Optional<bool> StaticallyGreater = LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond); unsigned numAdded = 0; - if (!StaticallyGreater.hasValue()) { + if (!StaticallyGreater) { Prolog->addSuccessor(Epilog); numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc()); } else if (*StaticallyGreater == false) { @@ -999,7 +1003,7 @@ MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr( } /// Update the machine instruction with new virtual registers. This -/// function may change the defintions and/or uses. +/// function may change the definitions and/or uses. void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI, bool LastDef, unsigned CurStageNum, @@ -1159,8 +1163,17 @@ void ModuloScheduleExpander::rewriteScheduledInstr( if (!InProlog && !Phi->isPHI() && StagePhi < StageSched) ReplaceReg = NewReg; if (ReplaceReg) { - MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); - UseOp.setReg(ReplaceReg); + const TargetRegisterClass *NRC = + MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg)); + if (NRC) + UseOp.setReg(ReplaceReg); + else { + Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY), + SplitReg) + .addReg(ReplaceReg); + UseOp.setReg(SplitReg); + } } } } @@ -1205,8 +1218,12 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI, MI.eraseFromParent(); Changed = true; } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) { - MRI.constrainRegClass(MI.getOperand(1).getReg(), - MRI.getRegClass(MI.getOperand(0).getReg())); + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(MI.getOperand(1).getReg(), + MRI.getRegClass(MI.getOperand(0).getReg())); + assert(ConstrainRegClass && + "Expected a valid constrained register class!"); + (void)ConstrainRegClass; MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); if (LIS) @@ -1404,7 +1421,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { while (DefaultI != Defaults.rend()) LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg)); - if (IllegalPhiDefault.hasValue()) { + if (IllegalPhiDefault) { // The consumer optionally consumes LoopProducer in the same iteration // (because the producer is scheduled at an earlier cycle than the consumer) // or the initial value. To facilitate this we create an illegal block here @@ -1414,7 +1431,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { Register R = MRI.createVirtualRegister(RC); MachineInstr *IllegalPhi = BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R) - .addReg(IllegalPhiDefault.getValue()) + .addReg(*IllegalPhiDefault) .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect. .addReg(LoopReg) .addMBB(BB); // Block choice is arbitrary and has no effect. @@ -1430,7 +1447,7 @@ Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) { Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, const TargetRegisterClass *RC) { // If the init register is not undef, try and find an existing phi. - if (InitReg.hasValue()) { + if (InitReg) { auto I = Phis.find({LoopReg, InitReg.getValue()}); if (I != Phis.end()) return I->second; @@ -1446,7 +1463,7 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, auto I = UndefPhis.find(LoopReg); if (I != UndefPhis.end()) { Register R = I->second; - if (!InitReg.hasValue()) + if (!InitReg) // Found a phi taking undef as input, and this input is undef so return // without any more changes. return R; @@ -1454,7 +1471,10 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, MachineInstr *MI = MRI.getVRegDef(R); MI->getOperand(1).setReg(InitReg.getValue()); Phis.insert({{LoopReg, InitReg.getValue()}, R}); - MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue())); + assert(ConstrainRegClass && "Expected a valid constrained register class!"); + (void)ConstrainRegClass; UndefPhis.erase(I); return R; } @@ -1463,14 +1483,18 @@ Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg, if (!RC) RC = MRI.getRegClass(LoopReg); Register R = MRI.createVirtualRegister(RC); - if (InitReg.hasValue()) - MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + if (InitReg) { + const TargetRegisterClass *ConstrainRegClass = + MRI.constrainRegClass(R, MRI.getRegClass(*InitReg)); + assert(ConstrainRegClass && "Expected a valid constrained register class!"); + (void)ConstrainRegClass; + } BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R) - .addReg(InitReg.hasValue() ? *InitReg : undef(RC)) + .addReg(InitReg ? *InitReg : undef(RC)) .addMBB(PreheaderBB) .addReg(LoopReg) .addMBB(BB); - if (!InitReg.hasValue()) + if (!InitReg) UndefPhis[LoopReg] = R; else Phis[{LoopReg, *InitReg}] = R; @@ -1793,10 +1817,10 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() { // Iterate in reverse order over all instructions, remapping as we go. for (MachineBasicBlock *B : reverse(Blocks)) { - for (auto I = B->getFirstInstrTerminator()->getReverseIterator(); + for (auto I = B->instr_rbegin(); I != std::next(B->getFirstNonPHI()->getReverseIterator());) { - MachineInstr *MI = &*I++; - rewriteUsesOf(MI); + MachineBasicBlock::reverse_instr_iterator MI = I++; + rewriteUsesOf(&*MI); } } for (auto *MI : IllegalPhisToDelete) { @@ -1919,7 +1943,7 @@ void PeelingModuloScheduleExpander::fixupBranches() { TII->removeBranch(*Prolog); Optional<bool> StaticallyGreater = LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond); - if (!StaticallyGreater.hasValue()) { + if (!StaticallyGreater) { LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n"); // Dynamically branch based on Cond. TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc()); @@ -1929,8 +1953,8 @@ void PeelingModuloScheduleExpander::fixupBranches() { // blocks. Leave it to unreachable-block-elim to clean up. Prolog->removeSuccessor(Fallthrough); for (MachineInstr &P : Fallthrough->phis()) { - P.RemoveOperand(2); - P.RemoveOperand(1); + P.removeOperand(2); + P.removeOperand(1); } TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc()); KernelDisposed = true; @@ -1939,8 +1963,8 @@ void PeelingModuloScheduleExpander::fixupBranches() { // Prolog always falls through; remove incoming values in epilog. Prolog->removeSuccessor(Epilog); for (MachineInstr &P : Epilog->phis()) { - P.RemoveOperand(4); - P.RemoveOperand(3); + P.removeOperand(4); + P.removeOperand(3); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp index db5217469fba..7304bfef55cb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/NonRelocatableStringpool.cpp @@ -25,7 +25,7 @@ DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) { Entry.Symbol = nullptr; CurrentEndOffset += S.size() + 1; } - return DwarfStringPoolEntryRef(*I.first, true); + return DwarfStringPoolEntryRef(*I.first); } StringRef NonRelocatableStringpool::internString(StringRef S) { @@ -44,7 +44,7 @@ NonRelocatableStringpool::getEntriesForEmission() const { Result.reserve(Strings.size()); for (const auto &E : Strings) if (E.getValue().isIndexed()) - Result.emplace_back(E, true); + Result.emplace_back(E); llvm::sort(Result, [](const DwarfStringPoolEntryRef A, const DwarfStringPoolEntryRef B) { return A.getIndex() < B.getIndex(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp index 8a6cf47c0d89..d5d262e4047a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp index 7693ab417de9..7709095cd683 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PHIElimination.cpp @@ -31,9 +31,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp index 3e32afaafa6e..43b23368ead2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ParallelCG.cpp @@ -16,8 +16,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SplitModule.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp index ca44b7a53982..0f9da0637ced 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PatchableFunction.cpp @@ -14,11 +14,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp index f9b16d2630d6..31e37c4cd7e3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -90,7 +90,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdint> @@ -214,8 +213,9 @@ namespace { const SmallSet<Register, 2> &TargetReg, RecurrenceCycle &RC); - /// If copy instruction \p MI is a virtual register copy, track it in - /// the set \p CopyMIs. If this virtual register was previously seen as a + /// If copy instruction \p MI is a virtual register copy or a copy of a + /// constant physical register to a virtual register, track it in the + /// set \p CopyMIs. If this virtual register was previously seen as a /// copy, replace the uses of this copy with the previously seen copy's /// destination register. bool foldRedundantCopy(MachineInstr &MI, @@ -810,7 +810,7 @@ protected: unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. public: Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} - virtual ~Rewriter() {} + virtual ~Rewriter() = default; /// Get the next rewritable source (SrcReg, SrcSubReg) and /// the related value that it affects (DstReg, DstSubReg). @@ -1022,7 +1022,7 @@ public: CurrentSrcIdx = -1; // Rewrite the operation as a COPY. // Get rid of the sub-register index. - CopyLike.RemoveOperand(2); + CopyLike.removeOperand(2); // Morph the operation into a COPY. CopyLike.setDesc(TII.get(TargetOpcode::COPY)); return true; @@ -1412,7 +1412,7 @@ bool PeepholeOptimizer::foldRedundantCopy( Register SrcReg = MI.getOperand(1).getReg(); unsigned SrcSubReg = MI.getOperand(1).getSubReg(); - if (!SrcReg.isVirtual()) + if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg)) return false; Register DstReg = MI.getOperand(0).getReg(); @@ -1643,8 +1643,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // without any intervening re-definition of $physreg. DenseMap<Register, MachineInstr *> NAPhysToVirtMIs; - // Set of pairs of virtual registers and their subregs that are copied - // from. + // Set of copies to virtual registers keyed by source register. Never + // holds any physreg which requires def tracking. DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs; bool IsLoopHeader = MLI->isLoopHeader(&MBB); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp index 82ed386db827..97b1532300b1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -28,14 +28,11 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" using namespace llvm; #define DEBUG_TYPE "post-RA-hazard-rec" @@ -72,10 +69,11 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { TII->CreateTargetPostRAHazardRecognizer(Fn)); // Return if the target has not implemented a hazard recognizer. - if (!HazardRec.get()) + if (!HazardRec) return false; // Loop over all of the basic blocks + bool Changed = false; for (auto &MBB : Fn) { // We do not call HazardRec->reset() here to make sure we are handling noop // hazards at the start of basic blocks. @@ -85,6 +83,8 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { HazardRec->EmitNoops(NumPreNoops); TII->insertNoops(MBB, MachineBasicBlock::iterator(MI), NumPreNoops); NumNoops += NumPreNoops; + if (NumPreNoops) + Changed = true; HazardRec->EmitInstruction(&MI); if (HazardRec->atIssueLimit()) { @@ -92,5 +92,5 @@ bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { } } } - return true; + return Changed; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp index aac46cb22084..98fc7e07a1b4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -25,18 +25,16 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -72,7 +70,7 @@ DebugMod("postra-sched-debugmod", cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden); -AntiDepBreaker::~AntiDepBreaker() { } +AntiDepBreaker::~AntiDepBreaker() = default; namespace { class PostRAScheduler : public MachineFunctionPass { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 74b903f99284..1115c2a27956 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -18,10 +18,8 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index d232ca3a69c3..7327f9e52efc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -11,10 +11,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -45,6 +46,11 @@ public: void getAnalysisUsage(AnalysisUsage &au) const override; bool runOnMachineFunction(MachineFunction &MF) override; + + virtual MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } }; } // end anonymous namespace @@ -124,7 +130,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { // Using instr wasn't found, it could be in another block. // Leave the physreg IMPLICIT_DEF, but trim any extra operands. for (unsigned i = MI->getNumOperands() - 1; i; --i) - MI->RemoveOperand(i); + MI->removeOperand(i); LLVM_DEBUG(dbgs() << "Keeping physreg: " << *MI); } @@ -140,7 +146,6 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); assert(WorkList.empty() && "Inconsistent worklist state"); for (MachineBasicBlock &MBB : MF) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 8d8a6126dad0..1a0f296d5fdc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -55,10 +55,8 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -130,6 +128,7 @@ private: void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj); void insertPrologEpilogCode(MachineFunction &MF); + void insertZeroCallUsedRegs(MachineFunction &MF); }; } // end anonymous namespace @@ -284,6 +283,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { assert(!Failed && "Invalid warn-stack-size fn attr value"); (void)Failed; } + if (MF.getFunction().hasFnAttribute(Attribute::SafeStack)) { + StackSize += MFI.getUnsafeStackSize(); + } if (StackSize > Threshold) { DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning); F.getContext().diagnose(DiagStackSize); @@ -837,8 +839,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // Adjust 'Offset' to point to the end of last fixed sized preallocated // object. for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) { - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) continue; int64_t FixedOff; @@ -855,47 +857,34 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { if (FixedOff > Offset) Offset = FixedOff; } + Align MaxAlign = MFI.getMaxAlign(); // First assign frame offsets to stack objects that are used to spill // callee saved registers. - if (StackGrowsDown && MaxCSFrameIndex >= MinCSFrameIndex) { - for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. - continue; + if (MaxCSFrameIndex >= MinCSFrameIndex) { + for (unsigned i = 0; i <= MaxCSFrameIndex - MinCSFrameIndex; ++i) { + unsigned FrameIndex = + StackGrowsDown ? MinCSFrameIndex + i : MaxCSFrameIndex - i; - // If the stack grows down, we need to add the size to find the lowest - // address of the object. - Offset += MFI.getObjectSize(i); - - // Adjust to alignment boundary - Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew); - - LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); - MFI.setObjectOffset(i, -Offset); // Set the computed offset - } - } else if (MaxCSFrameIndex >= MinCSFrameIndex) { - // Be careful about underflow in comparisons agains MinCSFrameIndex. - for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(FrameIndex) != TargetStackID::Default) continue; - if (MFI.isDeadObjectIndex(i)) + // TODO: should this just be if (MFI.isDeadObjectIndex(FrameIndex)) + if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex)) continue; - // Adjust to alignment boundary - Offset = alignTo(Offset, MFI.getObjectAlign(i), Skew); - - LLVM_DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); - MFI.setObjectOffset(i, Offset); - Offset += MFI.getObjectSize(i); + AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign, + Skew); } } + assert(MaxAlign == MFI.getMaxAlign() && + "MFI.getMaxAlign should already account for all callee-saved " + "registers without a fixed stack slot"); + // FixedCSEnd is the stack offset to the end of the fixed and callee-save // stack area. int64_t FixedCSEnd = Offset; - Align MaxAlign = MFI.getMaxAlign(); // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer @@ -982,8 +971,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i) continue; - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) continue; switch (MFI.getObjectSSPLayout(i)) { @@ -1036,8 +1025,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { continue; if (ProtectedObjs.count(i)) continue; - if (MFI.getStackID(i) != - TargetStackID::Default) // Only allocate objects on the default stack. + // Only allocate objects on the default stack. + if (MFI.getStackID(i) != TargetStackID::Default) continue; // Add the objects that we need to allocate to our working set. @@ -1145,6 +1134,9 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(MF, *RestoreBlock); + // Zero call used registers before restoring callee-saved registers. + insertZeroCallUsedRegs(MF); + for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.inlineStackProbe(MF, *SaveBlock); @@ -1155,11 +1147,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { if (MF.shouldSplitStack()) { for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.adjustForSegmentedStacks(MF, *SaveBlock); - // Record that there are split-stack functions, so we will emit a - // special section to tell the linker. - MF.getMMI().setHasSplitStack(true); - } else - MF.getMMI().setHasNosplitStack(true); + } // Emit additional code that is required to explicitly handle the stack in // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The @@ -1171,6 +1159,120 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) { TFI.adjustForHiPEPrologue(MF, *SaveBlock); } +/// insertZeroCallUsedRegs - Zero out call used registers. +void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { + const Function &F = MF.getFunction(); + + if (!F.hasFnAttribute("zero-call-used-regs")) + return; + + using namespace ZeroCallUsedRegs; + + ZeroCallUsedRegsKind ZeroRegsKind = + StringSwitch<ZeroCallUsedRegsKind>( + F.getFnAttribute("zero-call-used-regs").getValueAsString()) + .Case("skip", ZeroCallUsedRegsKind::Skip) + .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg) + .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR) + .Case("used-arg", ZeroCallUsedRegsKind::UsedArg) + .Case("used", ZeroCallUsedRegsKind::Used) + .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg) + .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR) + .Case("all-arg", ZeroCallUsedRegsKind::AllArg) + .Case("all", ZeroCallUsedRegsKind::All); + + if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip) + return; + + const bool OnlyGPR = static_cast<unsigned>(ZeroRegsKind) & ONLY_GPR; + const bool OnlyUsed = static_cast<unsigned>(ZeroRegsKind) & ONLY_USED; + const bool OnlyArg = static_cast<unsigned>(ZeroRegsKind) & ONLY_ARG; + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const BitVector AllocatableSet(TRI.getAllocatableSet(MF)); + + // Mark all used registers. + BitVector UsedRegs(TRI.getNumRegs()); + if (OnlyUsed) + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + MCRegister Reg = MO.getReg(); + if (AllocatableSet[Reg] && !MO.isImplicit() && + (MO.isDef() || MO.isUse())) + UsedRegs.set(Reg); + } + + BitVector RegsToZero(TRI.getNumRegs()); + for (MCRegister Reg : AllocatableSet.set_bits()) { + // Skip over fixed registers. + if (TRI.isFixedRegister(MF, Reg)) + continue; + + // Want only general purpose registers. + if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg)) + continue; + + // Want only used registers. + if (OnlyUsed && !UsedRegs[Reg]) + continue; + + // Want only registers used for arguments. + if (OnlyArg && !TRI.isArgumentRegister(MF, Reg)) + continue; + + RegsToZero.set(Reg); + } + + // Don't clear registers that are live when leaving the function. + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB.terminators()) { + if (!MI.isReturn()) + continue; + + for (const auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg())) + RegsToZero.reset(SReg); + } + } + + // Don't need to clear registers that are used/clobbered by terminating + // instructions. + for (const MachineBasicBlock &MBB : MF) { + if (!MBB.isReturnBlock()) + continue; + + MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); + for (MachineBasicBlock::const_iterator I = MBBI, E = MBB.end(); I != E; + ++I) { + for (const MachineOperand &MO : I->operands()) { + if (!MO.isReg()) + continue; + + for (const MCPhysReg &Reg : + TRI.sub_and_superregs_inclusive(MO.getReg())) + RegsToZero.reset(Reg); + } + } + } + + // Don't clear registers that are reset before exiting. + for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) + for (MCRegister Reg : TRI.sub_and_superregs_inclusive(CSI.getReg())) + RegsToZero.reset(Reg); + + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + for (MachineBasicBlock &MBB : MF) + if (MBB.isReturnBlock()) + TFI.emitZeroCallUsedRegs(RegsToZero, MBB); +} + /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. void PEI::replaceFrameIndices(MachineFunction &MF) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp index 5f69f9194125..86ea3ec67178 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoProbeInserter.cpp @@ -18,11 +18,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCPseudoProbe.h" -#include "llvm/Target/TargetMachine.h" -#include <unordered_set> #define DEBUG_TYPE "pseudo-probe-inserter" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp index 74e721dbd138..40c52b9d9707 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -11,26 +11,23 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + using namespace llvm; static const char *const PSVNames[] = { "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; -PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII) +PseudoSourceValue::PseudoSourceValue(unsigned Kind, const TargetMachine &TM) : Kind(Kind) { - AddressSpace = TII.getAddressSpaceForPseudoSourceKind(Kind); + AddressSpace = TM.getAddressSpaceForPseudoSourceKind(Kind); } - -PseudoSourceValue::~PseudoSourceValue() {} +PseudoSourceValue::~PseudoSourceValue() = default; void PseudoSourceValue::printCustom(raw_ostream &O) const { if (Kind < TargetCustom) @@ -79,9 +76,9 @@ void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } -CallEntryPseudoSourceValue::CallEntryPseudoSourceValue( - unsigned Kind, const TargetInstrInfo &TII) - : PseudoSourceValue(Kind, TII) {} +CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(unsigned Kind, + const TargetMachine &TM) + : PseudoSourceValue(Kind, TM) {} bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { return false; @@ -96,20 +93,17 @@ bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const { } GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue( - const GlobalValue *GV, - const TargetInstrInfo &TII) - : CallEntryPseudoSourceValue(GlobalValueCallEntry, TII), GV(GV) {} + const GlobalValue *GV, const TargetMachine &TM) + : CallEntryPseudoSourceValue(GlobalValueCallEntry, TM), GV(GV) {} ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue( - const char *ES, const TargetInstrInfo &TII) - : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TII), ES(ES) {} + const char *ES, const TargetMachine &TM) + : CallEntryPseudoSourceValue(ExternalSymbolCallEntry, TM), ES(ES) {} -PseudoSourceValueManager::PseudoSourceValueManager( - const TargetInstrInfo &TIInfo) - : TII(TIInfo), - StackPSV(PseudoSourceValue::Stack, TII), - GOTPSV(PseudoSourceValue::GOT, TII), - JumpTablePSV(PseudoSourceValue::JumpTable, TII), - ConstantPoolPSV(PseudoSourceValue::ConstantPool, TII) {} +PseudoSourceValueManager::PseudoSourceValueManager(const TargetMachine &TMInfo) + : TM(TMInfo), StackPSV(PseudoSourceValue::Stack, TM), + GOTPSV(PseudoSourceValue::GOT, TM), + JumpTablePSV(PseudoSourceValue::JumpTable, TM), + ConstantPoolPSV(PseudoSourceValue::ConstantPool, TM) {} const PseudoSourceValue *PseudoSourceValueManager::getStack() { return &StackPSV; @@ -129,7 +123,7 @@ const PseudoSourceValue * PseudoSourceValueManager::getFixedStack(int FI) { std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI]; if (!V) - V = std::make_unique<FixedStackPseudoSourceValue>(FI, TII); + V = std::make_unique<FixedStackPseudoSourceValue>(FI, TM); return V.get(); } @@ -138,7 +132,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { std::unique_ptr<const GlobalValuePseudoSourceValue> &E = GlobalCallEntries[GV]; if (!E) - E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TII); + E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TM); return E.get(); } @@ -147,6 +141,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E = ExternalCallEntries[ES]; if (!E) - E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII); + E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TM); return E.get(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp index 882f8e91bf1d..ec383b9b1c65 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFGraph.cpp @@ -8,6 +8,7 @@ // // Target-independent, SSA-based data flow graph for register data flow (RDF). // +#include "llvm/CodeGen/RDFGraph.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -18,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RDFGraph.h" #include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -27,8 +27,6 @@ #include "llvm/IR/Function.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -979,18 +977,6 @@ RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const { return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll()); } -RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { - if (AR.Reg == BR.Reg) { - LaneBitmask M = AR.Mask & BR.Mask; - return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); - } - // This isn't strictly correct, because the overlap may happen in the - // part masked out. - if (PRI.alias(AR, BR)) - return AR; - return RegisterRef(); -} - // For each stack in the map DefM, push the delimiter for block B on it. void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) { // Push block delimiters. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp index d704cf7b3213..2fd947086b4d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RDFLiveness.cpp @@ -22,6 +22,7 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. <hal-00647369> // +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -32,14 +33,12 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/CodeGen/RDFGraph.h" #include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -341,9 +340,8 @@ Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA, if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef)) continue; NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG); - if (Visited.count(PA.Id)) + if (!Visited.insert(PA.Id).second) continue; - Visited.insert(PA.Id); // Go over all phi uses and get the reaching defs for each use. for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) { const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 1264e6021b6e..69db8bad54f9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -34,12 +34,7 @@ static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg, const TargetRegisterInfo *TRI) { if (!isValidRegUse(MO)) return false; - if (MO.getReg() == PhysReg) - return true; - for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) - if (MO.getReg() == *R) - return true; - return false; + return TRI->regsOverlap(MO.getReg(), PhysReg); } static bool isValidRegDef(const MachineOperand &MO) { @@ -50,12 +45,7 @@ static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg, const TargetRegisterInfo *TRI) { if (!isValidRegDef(MO)) return false; - if (MO.getReg() == PhysReg) - return true; - for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R) - if (MO.getReg() == *R) - return true; - return false; + return TRI->regsOverlap(MO.getReg(), PhysReg); } void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp index d891d4c2ffbb..0c18814189eb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.cpp @@ -85,7 +85,7 @@ void RegAllocBase::allocatePhysRegs() { seedLiveRegs(); // Continue assigning vregs one at a time to available physical registers. - while (LiveInterval *VirtReg = dequeue()) { + while (const LiveInterval *VirtReg = dequeue()) { assert(!VRM->hasPhys(VirtReg->reg()) && "Register already assigned"); // Unused registers can appear when the spiller coalesces snippets. @@ -140,10 +140,7 @@ void RegAllocBase::allocatePhysRegs() { // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg(), AllocOrder.front()); - continue; - } - - if (AvailablePhysReg) + } else if (AvailablePhysReg) Matrix->assign(*VirtReg, AvailablePhysReg); for (Register Reg : SplitVRegs) { @@ -176,7 +173,7 @@ void RegAllocBase::postOptimization() { DeadRemats.clear(); } -void RegAllocBase::enqueue(LiveInterval *LI) { +void RegAllocBase::enqueue(const LiveInterval *LI) { const Register Reg = LI->reg(); assert(Reg.isVirtual() && "Can only enqueue virtual registers"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h index 1fb56dbaebb7..a8bf305a50c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBase.h @@ -96,19 +96,19 @@ protected: virtual Spiller &spiller() = 0; /// enqueue - Add VirtReg to the priority queue of unassigned registers. - virtual void enqueueImpl(LiveInterval *LI) = 0; + virtual void enqueueImpl(const LiveInterval *LI) = 0; /// enqueue - Add VirtReg to the priority queue of unassigned registers. - void enqueue(LiveInterval *LI); + void enqueue(const LiveInterval *LI); /// dequeue - Return the next unassigned register, or NULL. - virtual LiveInterval *dequeue() = 0; + virtual const LiveInterval *dequeue() = 0; // A RegAlloc pass should override this to provide the allocation heuristics. // Each call must guarantee forward progess by returning an available PhysReg // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. - virtual MCRegister selectOrSplit(LiveInterval &VirtReg, + virtual MCRegister selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl<Register> &splitLVRs) = 0; // Use this group name for NamedRegionTimer. @@ -116,7 +116,7 @@ protected: static const char TimerGroupDescription[]; /// Method called when the allocator is about to remove a LiveInterval. - virtual void aboutToRemoveInterval(LiveInterval &LI) {} + virtual void aboutToRemoveInterval(const LiveInterval &LI) {} public: /// VerifyEnabled - True when -verify-regalloc is given. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp index a9816b13e798..7defdf04aec8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -22,9 +22,7 @@ #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/Spiller.h" @@ -33,7 +31,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include <cstdlib> #include <queue> using namespace llvm; @@ -45,7 +42,7 @@ static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", namespace { struct CompSpillWeight { - bool operator()(LiveInterval *A, LiveInterval *B) const { + bool operator()(const LiveInterval *A, const LiveInterval *B) const { return A->weight() < B->weight(); } }; @@ -65,8 +62,9 @@ class RABasic : public MachineFunctionPass, // state std::unique_ptr<Spiller> SpillerInstance; - std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, - CompSpillWeight> Queue; + std::priority_queue<const LiveInterval *, std::vector<const LiveInterval *>, + CompSpillWeight> + Queue; // Scratch space. Allocated here to avoid repeated malloc calls in // selectOrSplit(). @@ -88,19 +86,17 @@ public: Spiller &spiller() override { return *SpillerInstance; } - void enqueueImpl(LiveInterval *LI) override { - Queue.push(LI); - } + void enqueueImpl(const LiveInterval *LI) override { Queue.push(LI); } - LiveInterval *dequeue() override { + const LiveInterval *dequeue() override { if (Queue.empty()) return nullptr; - LiveInterval *LI = Queue.top(); + const LiveInterval *LI = Queue.top(); Queue.pop(); return LI; } - MCRegister selectOrSplit(LiveInterval &VirtReg, + MCRegister selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl<Register> &SplitVRegs) override; /// Perform register allocation. @@ -119,7 +115,7 @@ public: // Helper for spilling all live virtual registers currently unified under preg // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. - bool spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, + bool spillInterferences(const LiveInterval &VirtReg, MCRegister PhysReg, SmallVectorImpl<Register> &SplitVRegs); static char ID; @@ -208,16 +204,17 @@ void RABasic::releaseMemory() { // Spill or split all live virtual registers currently unified under PhysReg // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. -bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, +bool RABasic::spillInterferences(const LiveInterval &VirtReg, + MCRegister PhysReg, SmallVectorImpl<Register> &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. - SmallVector<LiveInterval*, 8> Intfs; + SmallVector<const LiveInterval *, 8> Intfs; // Collect interferences assigned to any alias of the physical register. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - for (auto *Intf : reverse(Q.interferingVRegs())) { + for (const auto *Intf : reverse(Q.interferingVRegs())) { if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight()) return false; Intfs.push_back(Intf); @@ -229,7 +226,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, // Spill each interfering vreg allocated to PhysReg or an alias. for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { - LiveInterval &Spill = *Intfs[i]; + const LiveInterval &Spill = *Intfs[i]; // Skip duplicates. if (!VRM->hasPhys(Spill.reg())) @@ -258,7 +255,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, // |vregs| * |machineregs|. And since the number of interference tests is // minimal, there is no value in caching them outside the scope of // selectOrSplit(). -MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg, +MCRegister RABasic::selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl<Register> &SplitVRegs) { // Populate a list of physical register spill candidates. SmallVector<MCRegister, 8> PhysRegSpillCands; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp index fc5d1104a999..ee03feda796f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp @@ -11,13 +11,14 @@ //===----------------------------------------------------------------------===// #include "RegAllocEvictionAdvisor.h" +#include "AllocationOrder.h" #include "RegAllocGreedy.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" @@ -25,7 +26,7 @@ using namespace llvm; static cl::opt<RegAllocEvictionAdvisorAnalysis::AdvisorMode> Mode( - "regalloc-enable-advisor", cl::Hidden, cl::ZeroOrMore, + "regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values( @@ -42,6 +43,14 @@ static cl::opt<bool> EnableLocalReassignment( "may be compile time intensive"), cl::init(false)); +cl::opt<unsigned> EvictInterferenceCutoff( + "regalloc-eviction-max-interference-cutoff", cl::Hidden, + cl::desc("Number of interferences after which we declare " + "an interference unevictable and bail out. This " + "is a compilation cost-saving consideration. To " + "disable, pass a very large number."), + cl::init(10)); + #define DEBUG_TYPE "regalloc" #ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL #define LLVM_HAVE_TF_AOT @@ -66,7 +75,7 @@ public: private: std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(MachineFunction &MF, const RAGreedy &RA) override { + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override { return std::make_unique<DefaultEvictionAdvisor>(MF, RA); } bool doInitialization(Module &M) override { @@ -113,7 +122,7 @@ StringRef RegAllocEvictionAdvisorAnalysis::getPassName() const { llvm_unreachable("Unknown advisor kind"); } -RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF, +RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA) : MF(MF), RA(RA), Matrix(RA.getInterferenceMatrix()), LIS(RA.getLiveIntervals()), VRM(RA.getVirtRegMap()), @@ -136,8 +145,8 @@ RegAllocEvictionAdvisor::RegAllocEvictionAdvisor(MachineFunction &MF, /// register. /// @param B The live range to be evicted. /// @param BreaksHint True when B is already assigned to its preferred register. -bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, - LiveInterval &B, +bool DefaultEvictionAdvisor::shouldEvict(const LiveInterval &A, bool IsHint, + const LiveInterval &B, bool BreaksHint) const { bool CanSplit = RA.getExtraInfo().getStage(B) < RS_Spill; @@ -156,7 +165,7 @@ bool DefaultEvictionAdvisor::shouldEvict(LiveInterval &A, bool IsHint, /// canEvictHintInterference - return true if the interference for VirtReg /// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg. bool DefaultEvictionAdvisor::canEvictHintInterference( - LiveInterval &VirtReg, MCRegister PhysReg, + const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const { EvictionCost MaxCost; MaxCost.setBrokenHints(1); @@ -174,7 +183,7 @@ bool DefaultEvictionAdvisor::canEvictHintInterference( /// when returning true. /// @returns True when interference can be evicted cheaper than MaxCost. bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( - LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, + const LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint, EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const { // It is only possible to evict virtual register interference. if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) @@ -195,12 +204,12 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. - const auto &Interferences = Q.interferingVRegs(10); - if (Interferences.size() >= 10) + const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff); + if (Interferences.size() >= EvictInterferenceCutoff) return false; // Check if any interfering live range is heavier than MaxWeight. - for (LiveInterval *Intf : reverse(Interferences)) { + for (const LiveInterval *Intf : reverse(Interferences)) { assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); @@ -227,7 +236,10 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( MRI->getRegClass(Intf->reg()))); // Only evict older cascades or live ranges without a cascade. unsigned IntfCascade = RA.getExtraInfo().getCascade(Intf->reg()); - if (Cascade <= IntfCascade) { + if (Cascade == IntfCascade) + return false; + + if (Cascade < IntfCascade) { if (!Urgent) return false; // We permit breaking cascades for urgent evictions. It should be the @@ -261,7 +273,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost( } MCRegister DefaultEvictionAdvisor::tryFindEvictionCandidate( - LiveInterval &VirtReg, const AllocationOrder &Order, + const LiveInterval &VirtReg, const AllocationOrder &Order, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const { // Keep track of the cheapest interference seen so far. EvictionCost BestCost; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h index 1f40386db8da..d57b0ca6d53d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h @@ -9,19 +9,25 @@ #ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H #define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H -#include "AllocationOrder.h" -#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/Register.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/MC/MCRegister.h" #include "llvm/Pass.h" namespace llvm { +class AllocationOrder; +class LiveInterval; +class LiveIntervals; +class LiveRegMatrix; +class MachineFunction; +class MachineRegisterInfo; +class RegisterClassInfo; +class TargetRegisterInfo; +class VirtRegMap; using SmallVirtRegSet = SmallSet<Register, 16>; @@ -99,15 +105,14 @@ public: /// Find a physical register that can be freed by evicting the FixedRegisters, /// or return NoRegister. The eviction decision is assumed to be correct (i.e. /// no fixed live ranges are evicted) and profitable. - virtual MCRegister - tryFindEvictionCandidate(LiveInterval &VirtReg, const AllocationOrder &Order, - uint8_t CostPerUseLimit, - const SmallVirtRegSet &FixedRegisters) const = 0; + virtual MCRegister tryFindEvictionCandidate( + const LiveInterval &VirtReg, const AllocationOrder &Order, + uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const = 0; /// Find out if we can evict the live ranges occupying the given PhysReg, /// which is a hint (preferred register) for VirtReg. virtual bool - canEvictHintInterference(LiveInterval &VirtReg, MCRegister PhysReg, + canEvictHintInterference(const LiveInterval &VirtReg, MCRegister PhysReg, const SmallVirtRegSet &FixedRegisters) const = 0; /// Returns true if the given \p PhysReg is a callee saved register and has @@ -115,9 +120,9 @@ public: bool isUnusedCalleeSavedReg(MCRegister PhysReg) const; protected: - RegAllocEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA); + RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA); - Register canReassign(LiveInterval &VirtReg, Register PrevReg) const; + Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const; // Get the upper limit of elements in the given Order we need to analize. // TODO: is this heuristic, we could consider learning it. @@ -173,7 +178,7 @@ public: /// Get an advisor for the given context (i.e. machine function, etc) virtual std::unique_ptr<RegAllocEvictionAdvisor> - getAdvisor(MachineFunction &MF, const RAGreedy &RA) = 0; + getAdvisor(const MachineFunction &MF, const RAGreedy &RA) = 0; AdvisorMode getAdvisorMode() const { return Mode; } protected: @@ -200,19 +205,20 @@ RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor(); // out of RegAllocGreedy.cpp class DefaultEvictionAdvisor : public RegAllocEvictionAdvisor { public: - DefaultEvictionAdvisor(MachineFunction &MF, const RAGreedy &RA) + DefaultEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA) : RegAllocEvictionAdvisor(MF, RA) {} private: - MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &, - uint8_t, + MCRegister tryFindEvictionCandidate(const LiveInterval &, + const AllocationOrder &, uint8_t, const SmallVirtRegSet &) const override; - bool canEvictHintInterference(LiveInterval &, MCRegister, + bool canEvictHintInterference(const LiveInterval &, MCRegister, const SmallVirtRegSet &) const override; - bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool, + bool canEvictInterferenceBasedOnCost(const LiveInterval &, MCRegister, bool, EvictionCost &, const SmallVirtRegSet &) const; - bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const; + bool shouldEvict(const LiveInterval &A, bool, const LiveInterval &B, + bool) const; }; } // namespace llvm diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp index 6653145d3d2a..72ceaa768803 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocFast.cpp @@ -35,14 +35,9 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -364,7 +359,16 @@ bool RegAllocFast::mayLiveOut(Register VirtReg) { // If this block loops back to itself, it is necessary to check whether the // use comes after the def. if (MBB->isSuccessor(MBB)) { - SelfLoopDef = MRI->getUniqueVRegDef(VirtReg); + // Find the first def in the self loop MBB. + for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) { + if (DefInst.getParent() != MBB) { + MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); + return true; + } else { + if (!SelfLoopDef || dominates(*MBB, DefInst.getIterator(), SelfLoopDef)) + SelfLoopDef = &DefInst; + } + } if (!SelfLoopDef) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); return true; @@ -1117,6 +1121,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { RegMasks.clear(); BundleVirtRegsMap.clear(); + auto TiedOpIsUndef = [&](const MachineOperand &MO, unsigned Idx) { + assert(MO.isTied()); + unsigned TiedIdx = MI.findTiedOperandIdx(Idx); + const MachineOperand &TiedMO = MI.getOperand(TiedIdx); + return TiedMO.isUndef(); + }; // Scan for special cases; Apply pre-assigned register defs to state. bool HasPhysRegUse = false; bool HasRegMask = false; @@ -1124,7 +1134,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { bool HasDef = false; bool HasEarlyClobber = false; bool NeedToAssignLiveThroughs = false; - for (MachineOperand &MO : MI.operands()) { + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { + MachineOperand &MO = MI.getOperand(I); if (MO.isReg()) { Register Reg = MO.getReg(); if (Reg.isVirtual()) { @@ -1135,7 +1146,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { HasEarlyClobber = true; NeedToAssignLiveThroughs = true; } - if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || + (MO.getSubReg() != 0 && !MO.isUndef())) NeedToAssignLiveThroughs = true; } } else if (Reg.isPhysical()) { @@ -1235,7 +1247,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { MachineOperand &MO = MI.getOperand(OpIdx); LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); unsigned Reg = MO.getReg(); - if (MO.isEarlyClobber() || MO.isTied() || + if (MO.isEarlyClobber() || + (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) || (MO.getSubReg() && !MO.isUndef())) { defineLiveThroughVirtReg(MI, OpIdx, Reg); } else { @@ -1258,7 +1271,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { // Free registers occupied by defs. // Iterate operands in reverse order, so we see the implicit super register // defs first (we added them earlier in case of <def,read-undef>). - for (MachineOperand &MO : llvm::reverse(MI.operands())) { + for (signed I = MI.getNumOperands() - 1; I >= 0; --I) { + MachineOperand &MO = MI.getOperand(I); if (!MO.isReg() || !MO.isDef()) continue; @@ -1273,7 +1287,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) { "tied def assigned to clobbered register"); // Do not free tied operands and early clobbers. - if (MO.isTied() || MO.isEarlyClobber()) + if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber()) continue; Register Reg = MO.getReg(); if (!Reg) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp index 7870574df5b2..2efb98ae200d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -21,9 +21,7 @@ #include "SplitKit.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -62,6 +60,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" @@ -71,13 +70,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> #include <cstdint> -#include <memory> -#include <queue> -#include <tuple> #include <utility> using namespace llvm; @@ -127,11 +122,18 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden); -static cl::opt<bool> ConsiderLocalIntervalCost( - "consider-local-interval-cost", cl::Hidden, - cl::desc("Consider the cost of local intervals created by a split " - "candidate when choosing the best split candidate."), - cl::init(false)); +static cl::opt<unsigned long> GrowRegionComplexityBudget( + "grow-region-complexity-budget", + cl::desc("growRegion() does not scale with the number of BB edges, so " + "limit its budget and bail out once we reach the limit."), + cl::init(10000), cl::Hidden); + +static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness( + "greedy-regclass-priority-trumps-globalness", + cl::desc("Change the greedy register allocator's live range priority " + "calculation to make the AllocationPriority of the register class " + "more important then whether the range is global"), + cl::Hidden); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -277,9 +279,9 @@ void RAGreedy::releaseMemory() { GlobalCand.clear(); } -void RAGreedy::enqueueImpl(LiveInterval *LI) { enqueue(Queue, LI); } +void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); } -void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { +void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) { // Prioritize live ranges by size, assigning larger ranges first. // The queue holds (size, reg) pairs. const unsigned Size = LI->getSize(); @@ -308,8 +310,10 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); const TargetRegisterClass &RC = *MRI->getRegClass(Reg); - bool ForceGlobal = !ReverseLocal && - (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC)); + bool ForceGlobal = + !ReverseLocal && (Size / SlotIndex::InstrDist) > + (2 * RegClassInfo.getNumAllocatableRegs(&RC)); + unsigned GlobalBit = 0; if (Stage == RS_Assign && !ForceGlobal && !LI->empty() && LIS->intervalIsInOneMBB(*LI)) { @@ -324,15 +328,18 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // large blocks on targets with many physical registers. Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); } - Prio |= RC.AllocationPriority << 24; } else { // Allocate global and split ranges in long->short order. Long ranges that // don't fit should be spilled (or split) ASAP so they don't create // interference. Mark a bit to prioritize global above local ranges. - Prio = (1u << 29) + Size; - - Prio |= RC.AllocationPriority << 24; + Prio = Size; + GlobalBit = 1; } + if (RegClassPriorityTrumpsGlobalness) + Prio |= RC.AllocationPriority << 25 | GlobalBit << 24; + else + Prio |= GlobalBit << 29 | RC.AllocationPriority << 24; + // Mark a higher bit to prioritize global and local above RS_Split. Prio |= (1u << 31); @@ -345,9 +352,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { CurQueue.push(std::make_pair(Prio, ~Reg)); } -LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } +const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } -LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { +const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { if (CurQueue.empty()) return nullptr; LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); @@ -360,10 +367,10 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { //===----------------------------------------------------------------------===// /// tryAssign - Try to assign VirtReg to an available register. -MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl<Register> &NewVRegs, - const SmallVirtRegSet &FixedRegisters) { +MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<Register> &NewVRegs, + const SmallVirtRegSet &FixedRegisters) { MCRegister PhysReg; for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) { assert(*I); @@ -413,7 +420,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// -Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, +Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg, Register PrevReg) const { auto Order = AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); @@ -440,94 +447,11 @@ Register RegAllocEvictionAdvisor::canReassign(LiveInterval &VirtReg, return PhysReg; } -/// Return true if all interferences between VirtReg and PhysReg between -/// Start and End can be evicted. -/// -/// \param VirtReg Live range that is about to be assigned. -/// \param PhysReg Desired register for assignment. -/// \param Start Start of range to look for interferences. -/// \param End End of range to look for interferences. -/// \param MaxCost Only look for cheaper candidates and update with new cost -/// when returning true. -/// \return True when interference can be evicted cheaper than MaxCost. -bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, - MCRegister PhysReg, SlotIndex Start, - SlotIndex End, - EvictionCost &MaxCost) const { - EvictionCost Cost; - - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - - // Check if any interfering live range is heavier than MaxWeight. - for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { - // Check if interference overlast the segment in interest. - if (!Intf->overlaps(Start, End)) - continue; - - // Cannot evict non virtual reg interference. - if (!Register::isVirtualRegister(Intf->reg())) - return false; - // Never evict spill products. They cannot split or spill. - if (ExtraInfo->getStage(*Intf) == RS_Done) - return false; - - // Would this break a satisfied hint? - bool BreaksHint = VRM->hasPreferredPhys(Intf->reg()); - // Update eviction cost. - Cost.BrokenHints += BreaksHint; - Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight()); - // Abort if this would be too expensive. - if (!(Cost < MaxCost)) - return false; - } - } - - if (Cost.MaxWeight == 0) - return false; - - MaxCost = Cost; - return true; -} - -/// Return the physical register that will be best -/// candidate for eviction by a local split interval that will be created -/// between Start and End. -/// -/// \param Order The allocation order -/// \param VirtReg Live range that is about to be assigned. -/// \param Start Start of range to look for interferences -/// \param End End of range to look for interferences -/// \param BestEvictweight The eviction cost of that eviction -/// \return The PhysReg which is the best candidate for eviction and the -/// eviction cost in BestEvictweight -MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, - const LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictweight) const { - EvictionCost BestEvictCost; - BestEvictCost.setMax(); - BestEvictCost.MaxWeight = VirtReg.weight(); - MCRegister BestEvicteePhys; - - // Go over all physical registers and find the best candidate for eviction - for (MCRegister PhysReg : Order.getOrder()) { - - if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, - BestEvictCost)) - continue; - - // Best so far. - BestEvicteePhys = PhysReg; - } - *BestEvictweight = BestEvictCost.MaxWeight; - return BestEvicteePhys; -} - /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. -void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, +void RAGreedy::evictInterference(const LiveInterval &VirtReg, + MCRegister PhysReg, SmallVectorImpl<Register> &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be @@ -538,25 +462,23 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, << " interference: Cascade " << Cascade << '\n'); // Collect all interfering virtregs first. - SmallVector<LiveInterval*, 8> Intfs; + SmallVector<const LiveInterval *, 8> Intfs; for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // We usually have the interfering VRegs cached so collectInterferingVRegs() // should be fast, we may need to recalculate if when different physregs // overlap the same register unit so we had different SubRanges queried // against it. - ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); + ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs(); Intfs.append(IVR.begin(), IVR.end()); } // Evict them second. This will invalidate the queries. - for (LiveInterval *Intf : Intfs) { + for (const LiveInterval *Intf : Intfs) { // The same VirtReg may be present in multiple RegUnits. Skip duplicates. if (!VRM->hasPhys(Intf->reg())) continue; - LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg()); - Matrix->unassign(*Intf); assert((ExtraInfo->getCascade(Intf->reg()) < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && @@ -624,7 +546,8 @@ bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit, /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. -MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, +MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg, + AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) { @@ -782,12 +705,17 @@ bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { unsigned Visited = 0; #endif + unsigned long Budget = GrowRegionComplexityBudget; while (true) { ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); // Find new through blocks in the periphery of PrefRegBundles. for (unsigned Bundle : NewBundles) { // Look at all blocks connected to Bundle in the full graph. ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); + // Limit compilation time by bailing out after we use all our budget. + if (Blocks.size() >= Budget) + return false; + Budget -= Blocks.size(); for (unsigned Block : Blocks) { if (!Todo.test(Block)) continue; @@ -887,147 +815,14 @@ BlockFrequency RAGreedy::calcSpillCost() { return Cost; } -/// Check if splitting Evictee will create a local split interval in -/// basic block number BBNumber that may cause a bad eviction chain. This is -/// intended to prevent bad eviction sequences like: -/// movl %ebp, 8(%esp) # 4-byte Spill -/// movl %ecx, %ebp -/// movl %ebx, %ecx -/// movl %edi, %ebx -/// movl %edx, %edi -/// cltd -/// idivl %esi -/// movl %edi, %edx -/// movl %ebx, %edi -/// movl %ecx, %ebx -/// movl %ebp, %ecx -/// movl 16(%esp), %ebp # 4 - byte Reload -/// -/// Such sequences are created in 2 scenarios: -/// -/// Scenario #1: -/// %0 is evicted from physreg0 by %1. -/// Evictee %0 is intended for region splitting with split candidate -/// physreg0 (the reg %0 was evicted from). -/// Region splitting creates a local interval because of interference with the -/// evictor %1 (normally region splitting creates 2 interval, the "by reg" -/// and "by stack" intervals and local interval created when interference -/// occurs). -/// One of the split intervals ends up evicting %2 from physreg1. -/// Evictee %2 is intended for region splitting with split candidate -/// physreg1. -/// One of the split intervals ends up evicting %3 from physreg2, etc. -/// -/// Scenario #2 -/// %0 is evicted from physreg0 by %1. -/// %2 is evicted from physreg2 by %3 etc. -/// Evictee %0 is intended for region splitting with split candidate -/// physreg1. -/// Region splitting creates a local interval because of interference with the -/// evictor %1. -/// One of the split intervals ends up evicting back original evictor %1 -/// from physreg0 (the reg %0 was evicted from). -/// Another evictee %2 is intended for region splitting with split candidate -/// physreg1. -/// One of the split intervals ends up evicting %3 from physreg2, etc. -/// -/// \param Evictee The register considered to be split. -/// \param Cand The split candidate that determines the physical register -/// we are splitting for and the interferences. -/// \param BBNumber The number of a BB for which the region split process will -/// create a local split interval. -/// \param Order The physical registers that may get evicted by a split -/// artifact of Evictee. -/// \return True if splitting Evictee may cause a bad eviction chain, false -/// otherwise. -bool RAGreedy::splitCanCauseEvictionChain(Register Evictee, - GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order) { - EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee); - unsigned Evictor = VregEvictorInfo.first; - MCRegister PhysReg = VregEvictorInfo.second; - - // No actual evictor. - if (!Evictor || !PhysReg) - return false; - - float MaxWeight = 0; - MCRegister FutureEvictedPhysReg = - getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), - Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); - - // The bad eviction chain occurs when either the split candidate is the - // evicting reg or one of the split artifact will evict the evicting reg. - if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) - return false; - - Cand.Intf.moveToBlock(BBNumber); - - // Check to see if the Evictor contains interference (with Evictee) in the - // given BB. If so, this interference caused the eviction of Evictee from - // PhysReg. This suggest that we will create a local interval during the - // region split to avoid this interference This local interval may cause a bad - // eviction chain. - if (!LIS->hasInterval(Evictor)) - return false; - LiveInterval &EvictorLI = LIS->getInterval(Evictor); - if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end()) - return false; - - // Now, check to see if the local interval we will create is going to be - // expensive enough to evict somebody If so, this may cause a bad eviction - // chain. - float splitArtifactWeight = - VRAI->futureWeight(LIS->getInterval(Evictee), - Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); - if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) - return false; - - return true; -} - -/// Check if splitting VirtRegToSplit will create a local split interval -/// in basic block number BBNumber that may cause a spill. -/// -/// \param VirtRegToSplit The register considered to be split. -/// \param Cand The split candidate that determines the physical -/// register we are splitting for and the interferences. -/// \param BBNumber The number of a BB for which the region split process -/// will create a local split interval. -/// \param Order The physical registers that may get evicted by a -/// split artifact of VirtRegToSplit. -/// \return True if splitting VirtRegToSplit may cause a spill, false -/// otherwise. -bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order) { - Cand.Intf.moveToBlock(BBNumber); - - // Check if the local interval will find a non interfereing assignment. - for (auto PhysReg : Order.getOrder()) { - if (!Matrix->checkInterference(Cand.Intf.first().getPrevIndex(), - Cand.Intf.last(), PhysReg)) - return false; - } - - // The local interval is not able to find non interferencing assignment - // and not able to evict a less worthy interval, therfore, it can cause a - // spill. - return true; -} - /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, - const AllocationOrder &Order, - bool *CanCauseEvictionChain) { + const AllocationOrder &Order) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; - Register VirtRegToSplit = SA->getParent().reg(); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned I = 0; I != UseBlocks.size(); ++I) { const SplitAnalysis::BlockInfo &BI = UseBlocks[I]; @@ -1037,29 +832,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, unsigned Ins = 0; Cand.Intf.moveToBlock(BC.Number); - // Check wheather a local interval is going to be created during the region - // split. Calculate adavanced spilt cost (cost of local intervals) if option - // is enabled. - if (EnableAdvancedRASplitCost && Cand.Intf.hasInterference() && BI.LiveIn && - BI.LiveOut && RegIn && RegOut) { - - if (CanCauseEvictionChain && - splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { - // This interference causes our eviction from this assignment, we might - // evict somebody else and eventually someone will spill, add that cost. - // See splitCanCauseEvictionChain for detailed description of scenarios. - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - - *CanCauseEvictionChain = true; - - } else if (splitCanCauseLocalSpill(VirtRegToSplit, Cand, BC.Number, - Order)) { - // This interference causes local interval to spill, add that cost. - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); - } - } if (BI.LiveIn) Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); @@ -1080,20 +852,6 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, if (Cand.Intf.hasInterference()) { GlobalCost += SpillPlacer->getBlockFrequency(Number); GlobalCost += SpillPlacer->getBlockFrequency(Number); - - // Check wheather a local interval is going to be created during the - // region split. - if (EnableAdvancedRASplitCost && CanCauseEvictionChain && - splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { - // This interference cause our eviction from this assignment, we might - // evict somebody else, add that cost. - // See splitCanCauseEvictionChain for detailed description of - // scenarios. - GlobalCost += SpillPlacer->getBlockFrequency(Number); - GlobalCost += SpillPlacer->getBlockFrequency(Number); - - *CanCauseEvictionChain = true; - } } continue; } @@ -1253,7 +1011,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, MF->verify(this, "After splitting live range around region"); } -MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, +MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs) { if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg)) @@ -1276,19 +1034,8 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } - bool CanCauseEvictionChain = false; - unsigned BestCand = - calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, - false /*IgnoreCSR*/, &CanCauseEvictionChain); - - // Split candidates with compact regions can cause a bad eviction sequence. - // See splitCanCauseEvictionChain for detailed description of scenarios. - // To avoid it, we need to comapre the cost with the spill cost and not the - // current max frequency. - if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && - CanCauseEvictionChain) { - return MCRegister::NoRegister; - } + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, false /*IgnoreCSR*/); // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) @@ -1297,11 +1044,11 @@ MCRegister RAGreedy::tryRegionSplit(LiveInterval &VirtReg, return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); } -unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, +unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain) { + unsigned &NumCands, + bool IgnoreCSR) { unsigned BestCand = NoCand; for (MCPhysReg PhysReg : Order) { assert(PhysReg); @@ -1364,8 +1111,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, continue; } - bool HasEvictionChain = false; - Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); + Cost += calcGlobalSplitCost(Cand, Order); LLVM_DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1376,28 +1122,14 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (Cost < BestCost) { BestCand = NumCands; BestCost = Cost; - // See splitCanCauseEvictionChain for detailed description of bad - // eviction chain scenarios. - if (CanCauseEvictionChain) - *CanCauseEvictionChain = HasEvictionChain; } ++NumCands; } - if (CanCauseEvictionChain && BestCand != NoCand) { - // See splitCanCauseEvictionChain for detailed description of bad - // eviction chain scenarios. - LLVM_DEBUG(dbgs() << "Best split candidate of vreg " - << printReg(VirtReg.reg(), TRI) << " may "); - if (!(*CanCauseEvictionChain)) - LLVM_DEBUG(dbgs() << "not "); - LLVM_DEBUG(dbgs() << "cause bad eviction chain\n"); - } - return BestCand; } -unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, +unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, SmallVectorImpl<Register> &NewVRegs) { SmallVector<unsigned, 8> UsedCands; @@ -1444,7 +1176,8 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, /// tryBlockSplit - Split a global live range around every block with uses. This /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. -unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, +unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg, + AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); Register Reg = VirtReg.reg(); @@ -1507,9 +1240,9 @@ static unsigned getNumAllocatableRegsForConstraints( /// be moved to a larger register class. /// /// This is similar to spilling to a larger register class. -unsigned -RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl<Register> &NewVRegs) { +unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<Register> &NewVRegs) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(CurRC)) @@ -1529,7 +1262,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC, *MF); - unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + unsigned SuperRCNumAllocatableRegs = + RegClassInfo.getNumAllocatableRegs(SuperRC); // Split around every non-copy instruction if this split will relax // the constraints on the virtual register. // Otherwise, splitting just inserts uncoalescable copies that do not help @@ -1539,7 +1273,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (MI->isFullCopy() || SuperRCNumAllocatableRegs == getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, - TII, TRI, RCI)) { + TII, TRI, RegClassInfo)) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } @@ -1649,7 +1383,8 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg, /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only /// basic block. /// -unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, +unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg, + AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs) { // TODO: the function currently only handles a single UseBlock; it should be // possible to generalize. @@ -1879,7 +1614,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// trySplit - Try to split VirtReg or one of its interferences, making it /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. -unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, +unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, const SmallVirtRegSet &FixedRegisters) { // Ranges must be Split2 or less. @@ -1928,6 +1663,18 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { return false; } +/// Return true if the existing assignment of \p Intf overlaps, but is not the +/// same, as \p PhysReg. +static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, + const VirtRegMap &VRM, + MCRegister PhysReg, + const LiveInterval &Intf) { + MCRegister AssignedReg = VRM.getPhys(Intf.reg()); + if (PhysReg == AssignedReg) + return false; + return TRI.regsOverlap(PhysReg, AssignedReg); +} + /// mayRecolorAllInterferences - Check if the virtual registers that /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be /// recolored to free \p PhysReg. @@ -1937,8 +1684,8 @@ static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) { /// \p FixedRegisters contains all the virtual registers that cannot be /// recolored. bool RAGreedy::mayRecolorAllInterferences( - MCRegister PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, - const SmallVirtRegSet &FixedRegisters) { + MCRegister PhysReg, const LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) { const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg()); for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { @@ -1952,13 +1699,21 @@ bool RAGreedy::mayRecolorAllInterferences( CutOffInfo |= CO_Interf; return false; } - for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { - // If Intf is done and sit on the same register class as VirtReg, - // it would not be recolorable as it is in the same state as VirtReg. - // However, if VirtReg has tied defs and Intf doesn't, then + for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { + // If Intf is done and sits on the same register class as VirtReg, it + // would not be recolorable as it is in the same state as + // VirtReg. However there are at least two exceptions. + // + // If VirtReg has tied defs and Intf doesn't, then // there is still a point in examining if it can be recolorable. + // + // Additionally, if the register class has overlapping tuple members, it + // may still be recolorable using a different tuple. This is more likely + // if the existing assignment aliases with the candidate. + // if (((ExtraInfo->getStage(*Intf) == RS_Done && - MRI->getRegClass(Intf->reg()) == CurRC) && + MRI->getRegClass(Intf->reg()) == CurRC && + !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) && !(hasTiedDef(MRI, VirtReg.reg()) && !hasTiedDef(MRI, Intf->reg()))) || FixedRegisters.count(Intf->reg())) { @@ -2008,18 +1763,26 @@ bool RAGreedy::mayRecolorAllInterferences( /// (split, spill) during the process and that must be assigned. /// \p FixedRegisters contains all the virtual registers that cannot be /// recolored. +/// +/// \p RecolorStack tracks the original assignments of successfully recolored +/// registers. +/// /// \p Depth gives the current depth of the last chance recoloring. /// \return a physical register that can be used for VirtReg or ~0u if none /// exists. -unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, +unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg)) return ~0u; LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + + const ssize_t EntryStackSize = RecolorStack.size(); + // Ranges must be Done. assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && "Last chance recoloring should really be last chance"); @@ -2035,9 +1798,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // Set of Live intervals that will need to be recolored. SmallLISet RecoloringCandidates; - // Record the original mapping virtual register to physical register in case - // the recoloring fails. - DenseMap<Register, MCRegister> VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in // this recoloring "session". assert(!FixedRegisters.count(VirtReg.reg())); @@ -2049,7 +1810,6 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " << printReg(PhysReg, TRI) << '\n'); RecoloringCandidates.clear(); - VirtRegToPhysReg.clear(); CurrentNewVRegs.clear(); // It is only possible to recolor virtual register interference. @@ -2069,18 +1829,19 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, continue; } - // RecoloringCandidates contains all the virtual registers that interfer - // with VirtReg on PhysReg (or one of its aliases). - // Enqueue them for recoloring and perform the actual recoloring. + // RecoloringCandidates contains all the virtual registers that interfere + // with VirtReg on PhysReg (or one of its aliases). Enqueue them for + // recoloring and perform the actual recoloring. PQueue RecoloringQueue; - for (LiveInterval *RC : RecoloringCandidates) { + for (const LiveInterval *RC : RecoloringCandidates) { Register ItVirtReg = RC->reg(); enqueue(RecoloringQueue, RC); assert(VRM->hasPhys(ItVirtReg) && "Interferences are supposed to be with allocated variables"); // Record the current allocation. - VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg))); + // unset the related struct. Matrix->unassign(*RC); } @@ -2095,7 +1856,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // at this point for the next physical register. SmallVirtRegSet SaveFixedRegisters(FixedRegisters); if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs, - FixedRegisters, Depth)) { + FixedRegisters, RecolorStack, Depth)) { // Push the queued vregs into the main queue. for (Register NewVReg : CurrentNewVRegs) NewVRegs.push_back(NewVReg); @@ -2122,13 +1883,31 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, NewVRegs.push_back(R); } - for (LiveInterval *RC : RecoloringCandidates) { - Register ItVirtReg = RC->reg(); - if (VRM->hasPhys(ItVirtReg)) - Matrix->unassign(*RC); - MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg]; - Matrix->assign(*RC, ItPhysReg); + // Roll back our unsuccessful recoloring. Also roll back any successful + // recolorings in any recursive recoloring attempts, since it's possible + // they would have introduced conflicts with assignments we will be + // restoring further up the stack. Perform all unassignments prior to + // reassigning, since sub-recolorings may have conflicted with the registers + // we are going to restore to their original assignments. + for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) { + const LiveInterval *LI; + MCRegister PhysReg; + std::tie(LI, PhysReg) = RecolorStack[I]; + + if (VRM->hasPhys(LI->reg())) + Matrix->unassign(*LI); } + + for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) { + const LiveInterval *LI; + MCRegister PhysReg; + std::tie(LI, PhysReg) = RecolorStack[I]; + if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg())) + Matrix->assign(*LI, PhysReg); + } + + // Pop the stack of recoloring attempts. + RecolorStack.resize(EntryStackSize); } // Last chance recoloring did not worked either, give up. @@ -2146,12 +1925,13 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { while (!RecoloringQueue.empty()) { - LiveInterval *LI = dequeue(RecoloringQueue); + const LiveInterval *LI = dequeue(RecoloringQueue); LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); - MCRegister PhysReg = - selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, + RecolorStack, Depth + 1); // When splitting happens, the live-range may actually be empty. // In that case, this is okay to continue the recoloring even // if we did not find an alternative color for it. Indeed, @@ -2178,12 +1958,14 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, // Main Entry Point //===----------------------------------------------------------------------===// -MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, +MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg, SmallVectorImpl<Register> &NewVRegs) { CutOffInfo = CO_None; LLVMContext &Ctx = MF->getFunction().getContext(); SmallVirtRegSet FixedRegisters; - MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + RecoloringStack RecolorStack; + MCRegister Reg = + selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack); if (Reg == ~0U && (CutOffInfo != CO_None)) { uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); if (CutOffEncountered == CO_Depth) @@ -2208,10 +1990,9 @@ MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg, /// Spilling a live range in the cold path can have lower cost than using /// the CSR for the first time. Returns the physical register if we decide /// to use the CSR; otherwise return 0. -MCRegister -RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, - MCRegister PhysReg, uint8_t &CostPerUseLimit, - SmallVectorImpl<Register> &NewVRegs) { +MCRegister RAGreedy::tryAssignCSRFirstTime( + const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, + uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) { if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. @@ -2243,7 +2024,7 @@ RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, return PhysReg; } -void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { +void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) { // Do not keep invalid information around. SetOfBrokenHints.remove(&LI); } @@ -2317,7 +2098,7 @@ BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, /// For a given live range, profitability is determined by the sum of the /// frequencies of the non-identity copies it would introduce with the old /// and new register. -void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { +void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) { // We have a broken hint, check if it is possible to fix it by // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted // some register and PhysReg may be available for the other live-ranges. @@ -2431,7 +2212,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { /// This is likely that we can assign the same register for b, c, and d, /// getting rid of 2 copies. void RAGreedy::tryHintsRecoloring() { - for (LiveInterval *LI : SetOfBrokenHints) { + for (const LiveInterval *LI : SetOfBrokenHints) { assert(Register::isVirtualRegister(LI->reg()) && "Recoloring is possible only for virtual registers"); // Some dead defs may be around (e.g., because of debug uses). @@ -2442,9 +2223,10 @@ void RAGreedy::tryHintsRecoloring() { } } -MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, +MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg, SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters, + RecoloringStack &RecolorStack, unsigned Depth) { uint8_t CostPerUseLimit = uint8_t(~0u); // First try assigning a free register. @@ -2452,8 +2234,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix); if (MCRegister PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) { - // If VirtReg got an assignment, the eviction info is no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg()); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. @@ -2488,9 +2268,6 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // copy-related live-ranges. if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); - // If VirtReg eviction someone, the eviction info for it as an evictee is - // no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg()); return PhysReg; } @@ -2510,18 +2287,16 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters); - if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { - // If VirtReg got split, the eviction info is no longer relevant. - LastEvicted.clearEvicteeInfo(VirtReg.reg()); + if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) return PhysReg; - } } // If we couldn't allocate a register from spilling, there is probably some // invalid inline assembly. The base class will report it. - if (Stage >= RS_Done || !VirtReg.isSpillable()) + if (Stage >= RS_Done || !VirtReg.isSpillable()) { return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, - Depth); + RecolorStack, Depth); + } // Finally spill VirtReg itself. if ((EnableDeferredSpilling || @@ -2713,19 +2488,27 @@ void RAGreedy::reportStats() { } } +bool RAGreedy::hasVirtRegAlloc() { + for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { + Register Reg = Register::index2VirtReg(I); + if (MRI->reg_nodbg_empty(Reg)) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!RC) + continue; + if (ShouldAllocateClass(*TRI, *RC)) + return true; + } + + return false; +} + bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" << "********** Function: " << mf.getName() << '\n'); MF = &mf; - TRI = MF->getSubtarget().getRegisterInfo(); TII = MF->getSubtarget().getInstrInfo(); - RCI.runOnMachineFunction(mf); - - EnableAdvancedRASplitCost = - ConsiderLocalIntervalCost.getNumOccurrences() - ? ConsiderLocalIntervalCost - : MF->getSubtarget().enableAdvancedRASplitCost(); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2733,6 +2516,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>(), getAnalysis<LiveRegMatrix>()); + + // Early return if there is no virtual register to be allocated to a + // physical register. + if (!hasVirtRegAlloc()) + return false; + Indexes = &getAnalysis<SlotIndexes>(); MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); DomTree = &getAnalysis<MachineDominatorTree>(); @@ -2746,6 +2535,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { initializeCSRCost(); RegCosts = TRI->getRegisterCosts(*MF); + RegClassPriorityTrumpsGlobalness = + GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences() + ? GreedyRegClassPriorityTrumpsGlobalness + : TRI->regClassPriorityTrumpsGlobalness(*MF); ExtraInfo.emplace(); EvictAdvisor = @@ -2764,7 +2557,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); - LastEvicted.clear(); allocatePhysRegs(); tryHintsRecoloring(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h index e9a5fe635f26..358e74541a54 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocGreedy.h @@ -12,9 +12,7 @@ #ifndef LLVM_CODEGEN_REGALLOCGREEDY_H_ #define LLVM_CODEGEN_REGALLOCGREEDY_H_ -#include "AllocationOrder.h" #include "InterferenceCache.h" -#include "LiveDebugVariables.h" #include "RegAllocBase.h" #include "RegAllocEvictionAdvisor.h" #include "SpillPlacement.h" @@ -23,52 +21,44 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" -#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/LiveStacks.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/Spiller.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Target/TargetMachine.h" #include <algorithm> -#include <cassert> #include <cstdint> #include <memory> #include <queue> -#include <tuple> #include <utility> namespace llvm { +class AllocationOrder; +class AnalysisUsage; +class EdgeBundles; +class LiveDebugVariables; +class LiveIntervals; +class LiveRegMatrix; +class MachineBasicBlock; +class MachineBlockFrequencyInfo; +class MachineDominatorTree; +class MachineLoop; +class MachineLoopInfo; +class MachineOptimizationRemarkEmitter; +class MachineOptimizationRemarkMissed; +class SlotIndex; +class SlotIndexes; +class TargetInstrInfo; +class VirtRegMap; + class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { @@ -162,15 +152,18 @@ public: private: // Convenient shortcuts. using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>; - using SmallLISet = SmallPtrSet<LiveInterval *, 4>; + using SmallLISet = SmallPtrSet<const LiveInterval *, 4>; + + // We need to track all tentative recolorings so we can roll back any + // successful and unsuccessful recoloring attempts. + using RecoloringStack = + SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>; // context MachineFunction *MF; // Shortcuts to some useful interface. const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RCI; // analyses SlotIndexes *Indexes; @@ -210,57 +203,6 @@ private: static const char *const StageName[]; #endif - /// EvictionTrack - Keeps track of past evictions in order to optimize region - /// split decision. - class EvictionTrack { - - public: - using EvictorInfo = - std::pair<Register /* evictor */, MCRegister /* physreg */>; - using EvicteeInfo = llvm::DenseMap<Register /* evictee */, EvictorInfo>; - - private: - /// Each Vreg that has been evicted in the last stage of selectOrSplit will - /// be mapped to the evictor Vreg and the PhysReg it was evicted from. - EvicteeInfo Evictees; - - public: - /// Clear all eviction information. - void clear() { Evictees.clear(); } - - /// Clear eviction information for the given evictee Vreg. - /// E.g. when Vreg get's a new allocation, the old eviction info is no - /// longer relevant. - /// \param Evictee The evictee Vreg for whom we want to clear collected - /// eviction info. - void clearEvicteeInfo(Register Evictee) { Evictees.erase(Evictee); } - - /// Track new eviction. - /// The Evictor vreg has evicted the Evictee vreg from Physreg. - /// \param PhysReg The physical register Evictee was evicted from. - /// \param Evictor The evictor Vreg that evicted Evictee. - /// \param Evictee The evictee Vreg. - void addEviction(MCRegister PhysReg, Register Evictor, Register Evictee) { - Evictees[Evictee].first = Evictor; - Evictees[Evictee].second = PhysReg; - } - - /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. - /// \param Evictee The evictee vreg. - /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if - /// nobody has evicted Evictee from PhysReg. - EvictorInfo getEvictor(Register Evictee) { - if (Evictees.count(Evictee)) { - return Evictees[Evictee]; - } - - return EvictorInfo(0, 0); - } - }; - - // Keeps track of past evictions in order to optimize region split decision. - EvictionTrack LastEvicted; - // splitting state. std::unique_ptr<SplitAnalysis> SA; std::unique_ptr<SplitEditor> SE; @@ -320,17 +262,17 @@ private: /// Callee-save register cost, calculated once per machine function. BlockFrequency CSRCost; - /// Enable or not the consideration of the cost of local intervals created - /// by a split candidate when choosing the best split candidate. - bool EnableAdvancedRASplitCost; - /// Set of broken hints that may be reconciled later because of eviction. - SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + SmallSetVector<const LiveInterval *, 8> SetOfBrokenHints; /// The register cost values. This list will be recreated for each Machine /// Function ArrayRef<uint8_t> RegCosts; + /// Flags for the live range priority calculation, determined once per + /// machine function. + bool RegClassPriorityTrumpsGlobalness; + public: RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); @@ -341,11 +283,11 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; void releaseMemory() override; Spiller &spiller() override { return *SpillerInstance; } - void enqueueImpl(LiveInterval *LI) override; - LiveInterval *dequeue() override; - MCRegister selectOrSplit(LiveInterval &, + void enqueueImpl(const LiveInterval *LI) override; + const LiveInterval *dequeue() override; + MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl<Register> &) override; - void aboutToRemoveInterval(LiveInterval &) override; + void aboutToRemoveInterval(const LiveInterval &) override; /// Perform register allocation. bool runOnMachineFunction(MachineFunction &mf) override; @@ -363,81 +305,70 @@ public: static char ID; private: - MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned = 0); + MCRegister selectOrSplitImpl(const LiveInterval &, + SmallVectorImpl<Register> &, SmallVirtRegSet &, + RecoloringStack &, unsigned = 0); bool LRE_CanEraseVirtReg(Register) override; void LRE_WillShrinkVirtReg(Register) override; void LRE_DidCloneVirtReg(Register, Register) override; - void enqueue(PQueue &CurQueue, LiveInterval *LI); - LiveInterval *dequeue(PQueue &CurQueue); + void enqueue(PQueue &CurQueue, const LiveInterval *LI); + const LiveInterval *dequeue(PQueue &CurQueue); + bool hasVirtRegAlloc(); BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency &); bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); bool growRegion(GlobalSplitCandidate &Cand); - bool splitCanCauseEvictionChain(Register Evictee, GlobalSplitCandidate &Cand, - unsigned BBNumber, - const AllocationOrder &Order); - bool splitCanCauseLocalSpill(unsigned VirtRegToSplit, - GlobalSplitCandidate &Cand, unsigned BBNumber, - const AllocationOrder &Order); BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, - const AllocationOrder &Order, - bool *CanCauseEvictionChain); + const AllocationOrder &Order); bool calcCompactRegion(GlobalSplitCandidate &); void splitAroundRegion(LiveRangeEdit &, ArrayRef<unsigned>); void calcGapWeights(MCRegister, SmallVectorImpl<float> &); - bool canEvictInterferenceInRange(const LiveInterval &VirtReg, - MCRegister PhysReg, SlotIndex Start, - SlotIndex End, EvictionCost &MaxCost) const; - MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order, - const LiveInterval &VirtReg, - SlotIndex Start, SlotIndex End, - float *BestEvictWeight) const; - void evictInterference(LiveInterval &, MCRegister, + void evictInterference(const LiveInterval &, MCRegister, SmallVectorImpl<Register> &); - bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg, + bool mayRecolorAllInterferences(MCRegister PhysReg, + const LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); - MCRegister tryAssign(LiveInterval &, AllocationOrder &, + MCRegister tryAssign(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &, const SmallVirtRegSet &); - MCRegister tryEvict(LiveInterval &, AllocationOrder &, + MCRegister tryEvict(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &, uint8_t, const SmallVirtRegSet &); - MCRegister tryRegionSplit(LiveInterval &, AllocationOrder &, + MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &); /// Calculate cost of region splitting. - unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + unsigned calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR, - bool *CanCauseEvictionChain = nullptr); + unsigned &NumCands, bool IgnoreCSR); /// Perform region splitting. - unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, SmallVectorImpl<Register> &NewVRegs); /// Check other options before using a callee-saved register for the first /// time. - MCRegister tryAssignCSRFirstTime(LiveInterval &VirtReg, + MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg, uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs); void initializeCSRCost(); - unsigned tryBlockSplit(LiveInterval &, AllocationOrder &, + unsigned tryBlockSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &); - unsigned tryInstructionSplit(LiveInterval &, AllocationOrder &, + unsigned tryInstructionSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &); - unsigned tryLocalSplit(LiveInterval &, AllocationOrder &, + unsigned tryLocalSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &); - unsigned trySplit(LiveInterval &, AllocationOrder &, + unsigned trySplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &, const SmallVirtRegSet &); - unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + unsigned tryLastChanceRecoloring(const LiveInterval &, AllocationOrder &, SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned); + SmallVirtRegSet &, RecoloringStack &, + unsigned); bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<Register> &, - SmallVirtRegSet &, unsigned); - void tryHintRecoloring(LiveInterval &); + SmallVirtRegSet &, RecoloringStack &, unsigned); + void tryHintRecoloring(const LiveInterval &); void tryHintsRecoloring(); /// Model the information carried by one end of a copy. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp index 93be8f689d57..8c262130fb70 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -847,6 +847,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { while (!PBQPAllocComplete) { LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); + (void) Round; PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); initializeGraph(G, VRM, *VRegSpiller); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp index 740890831617..32fa5e07dd16 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.cpp @@ -13,19 +13,19 @@ //===----------------------------------------------------------------------===// #include "RegAllocScore.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/ADT/ilist_iterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include <cassert> -#include <cstdint> -#include <numeric> -#include <vector> +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; cl::opt<double> CopyWeight("regalloc-copy-weight", cl::init(0.2), cl::Hidden); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h index 3c28bb61189d..2bcd0b5895bf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegAllocScore.h @@ -15,21 +15,16 @@ #ifndef LLVM_CODEGEN_REGALLOCSCORE_H_ #define LLVM_CODEGEN_REGALLOCSCORE_H_ -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/Utils/TFUtils.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/IR/Module.h" -#include <cassert> -#include <cstdint> -#include <limits> +#include "llvm/ADT/STLFunctionalExtras.h" namespace llvm { +class AAResults; +class MachineBasicBlock; +class MachineBlockFrequencyInfo; +class MachineFunction; +class MachineInstr; + /// Regalloc score. class RegAllocScore final { double CopyCounts = 0.0; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp index 5a79ac44dcf4..16afd15e29e4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -17,16 +17,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/TargetFrameLowering.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index 800d952469a5..d356962e0d78 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -19,8 +19,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" @@ -29,7 +29,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp index 5c4d18ad79c5..512b21aeacaf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBank.cpp @@ -9,7 +9,7 @@ /// This file implements the RegisterBank class. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/RegisterBank.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp index 650500c7eb31..de851ffc7fdc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterBankInfo.cpp @@ -9,20 +9,17 @@ /// This file implements the RegisterBankInfo class. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterBank.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" -#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp index 65a65b9cae95..374fcc9a6014 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -44,9 +43,11 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; MF = &mf; + auto &STI = MF->getSubtarget(); + // Allocate new array the first time we see a new target. - if (MF->getSubtarget().getRegisterInfo() != TRI) { - TRI = MF->getSubtarget().getRegisterInfo(); + if (STI.getRegisterInfo() != TRI) { + TRI = STI.getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); Update = true; } @@ -68,6 +69,18 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } CalleeSavedRegs = CSR; + // Even if CSR list is same, we could have had a different allocation order + // if ignoreCSRForAllocationOrder is evaluated differently. + BitVector CSRHintsForAllocOrder(TRI->getNumRegs()); + for (const MCPhysReg *I = CSR; *I; ++I) + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) + CSRHintsForAllocOrder[*AI] = STI.ignoreCSRForAllocationOrder(mf, *AI); + if (IgnoreCSRForAllocOrder.size() != CSRHintsForAllocOrder.size() || + IgnoreCSRForAllocOrder != CSRHintsForAllocOrder) { + Update = true; + IgnoreCSRForAllocOrder = CSRHintsForAllocOrder; + } + RegCosts = TRI->getRegisterCosts(*MF); // Different reserved registers? diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp index a917b0d27d4a..930d05324440 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1647,7 +1647,7 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { MachineOperand &MO = CopyMI->getOperand(i-1); if (MO.isReg() && MO.isUse()) - CopyMI->RemoveOperand(i-1); + CopyMI->removeOperand(i-1); } LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " "implicit def\n"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp index 424ad7419165..289d31be2d2d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -37,11 +37,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <algorithm> #include <cassert> #include <iterator> #include <limits> -#include <string> #include <utility> using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp index 6858d7233bc5..9d9cdf9edbb3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -22,8 +22,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include <algorithm> -#include <cassert> #include <cstdint> #include <utility> #include <vector> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp index 49859aeec78b..01886e40a4a3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -12,13 +12,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" /// \file RemoveRedundantDebugValues.cpp /// diff --git a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 0872ec303460..466022ae0ac1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -33,9 +33,9 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 0ff045fa787e..87b8ac59bdba 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -1,4 +1,4 @@ -//=== ReplaceWithVeclib.cpp - Replace vector instrinsics with veclib calls ===// +//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -23,7 +23,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; @@ -110,7 +109,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, auto *ArgType = Arg.value()->getType(); // Vector calls to intrinsics can still have // scalar operands for specific arguments. - if (hasVectorInstrinsicScalarOpd(IntrinsicID, Arg.index())) { + if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, Arg.index())) { ScalarTypes.push_back(ArgType); } else { // The argument in this place should be a vector if diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp index 3d8a7eecce18..e7116ec3ea28 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStack.cpp @@ -17,7 +17,6 @@ #include "SafeStackLayout.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -49,10 +48,10 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" -#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -97,31 +96,12 @@ static cl::opt<bool> SafeStackUsePointerAddress("safestack-use-pointer-address", cl::init(false), cl::Hidden); -// Disabled by default due to PR32143. static cl::opt<bool> ClColoring("safe-stack-coloring", cl::desc("enable safe stack coloring"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); namespace { -/// Rewrite an SCEV expression for a memory access address to an expression that -/// represents offset from the given alloca. -/// -/// The implementation simply replaces all mentions of the alloca with zero. -class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> { - const Value *AllocaPtr; - -public: - AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) - : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} - - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (Expr->getValue() == AllocaPtr) - return SE.getZero(Expr->getType()); - return Expr; - } -}; - /// The SafeStack pass splits the stack of each function into the safe /// stack, which is only accessed through memory safe dereferences (as /// determined statically), and the unsafe stack, which contains all @@ -147,7 +127,7 @@ class SafeStack { /// /// 16 seems like a reasonable upper bound on the alignment of objects that we /// might expect to appear on the stack on most common targets. - static constexpr uint64_t StackAlignment = 16; + static constexpr Align StackAlignment = Align::Constant<16>(); /// Return the value of the stack canary. Value *getStackGuard(IRBuilder<> &IRB, Function &F); @@ -221,7 +201,7 @@ public: bool run(); }; -constexpr uint64_t SafeStack::StackAlignment; +constexpr Align SafeStack::StackAlignment; uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); @@ -236,9 +216,18 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, const Value *AllocaPtr, uint64_t AllocaSize) { - AllocaOffsetRewriter Rewriter(SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); + const SCEV *AddrExpr = SE.getSCEV(Addr); + const auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(AddrExpr)); + if (!Base || Base->getValue() != AllocaPtr) { + LLVM_DEBUG( + dbgs() << "[SafeStack] " + << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") + << *AllocaPtr << "\n" + << "SCEV " << *AddrExpr << " not directly based on alloca\n"); + return false; + } + const SCEV *Expr = SE.removePointerBase(AddrExpr); uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType()); ConstantRange AccessStartRange = SE.getUnsignedRange(Expr); ConstantRange SizeRange = @@ -645,6 +634,13 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // FIXME: no need to update BasePointer in leaf functions. unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment); + MDBuilder MDB(F.getContext()); + SmallVector<Metadata *, 2> Data; + Data.push_back(MDB.createString("unsafe-stack-size")); + Data.push_back(MDB.createConstant(ConstantInt::get(Int32Ty, FrameSize))); + MDNode *MD = MDTuple::get(F.getContext(), Data); + F.setMetadata(LLVMContext::MD_annotation, MD); + // Update shadow stack pointer in the function epilogue. IRB.SetInsertPoint(BasePointer->getNextNode()); @@ -677,13 +673,12 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( SP = IRB.CreateSub(SP, Size); // Align the SP value to satisfy the AllocaInst, type and stack alignments. - uint64_t Align = - std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()), - StackAlignment); + auto Align = std::max(std::max(DL.getPrefTypeAlign(Ty), AI->getAlign()), + StackAlignment); - assert(isPowerOf2_32(Align)); Value *NewTop = IRB.CreateIntToPtr( - IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))), + IRB.CreateAnd(SP, + ConstantInt::get(IntPtrTy, ~uint64_t(Align.value() - 1))), StackPtrTy); // Save the stack pointer. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp index 602afcfa9001..f821145f4b63 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -11,7 +11,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h index 4ac7af2059f5..6126c7a67854 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SafeStackLayout.h @@ -52,7 +52,7 @@ class StackLayout { void layoutObject(StackObject &Obj); public: - StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {} + StackLayout(Align StackAlignment) : MaxAlignment(StackAlignment) {} /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 0e8e8338b46d..07dcc34fbf15 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,7 +14,6 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/iterator_range.h" @@ -40,9 +39,6 @@ #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/LaneBitmask.h" @@ -65,9 +61,9 @@ using namespace llvm; #define DEBUG_TYPE "machine-scheduler" -static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, - cl::ZeroOrMore, cl::init(false), - cl::desc("Enable use of AA during MI DAG construction")); +static cl::opt<bool> + EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, + cl::desc("Enable use of AA during MI DAG construction")); static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 05b2a3764cca..e7b14944acfe 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -10,13 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp new file mode 100644 index 000000000000..c199b6a6cca8 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -0,0 +1,989 @@ +//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass converts selects to conditional jumps when profitable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/ScaledNumber.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +#include <algorithm> +#include <memory> +#include <queue> +#include <stack> +#include <string> + +using namespace llvm; + +#define DEBUG_TYPE "select-optimize" + +STATISTIC(NumSelectOptAnalyzed, + "Number of select groups considered for conversion to branch"); +STATISTIC(NumSelectConvertedExpColdOperand, + "Number of select groups converted due to expensive cold operand"); +STATISTIC(NumSelectConvertedHighPred, + "Number of select groups converted due to high-predictability"); +STATISTIC(NumSelectUnPred, + "Number of select groups not converted due to unpredictability"); +STATISTIC(NumSelectColdBB, + "Number of select groups not converted due to cold basic block"); +STATISTIC(NumSelectConvertedLoop, + "Number of select groups converted due to loop-level analysis"); +STATISTIC(NumSelectsConverted, "Number of selects converted"); + +static cl::opt<unsigned> ColdOperandThreshold( + "cold-operand-threshold", + cl::desc("Maximum frequency of path for an operand to be considered cold."), + cl::init(20), cl::Hidden); + +static cl::opt<unsigned> ColdOperandMaxCostMultiplier( + "cold-operand-max-cost-multiplier", + cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " + "slice of a cold operand to be considered inexpensive."), + cl::init(1), cl::Hidden); + +static cl::opt<unsigned> + GainGradientThreshold("select-opti-loop-gradient-gain-threshold", + cl::desc("Gradient gain threshold (%)."), + cl::init(25), cl::Hidden); + +static cl::opt<unsigned> + GainCycleThreshold("select-opti-loop-cycle-gain-threshold", + cl::desc("Minimum gain per loop (in cycles) threshold."), + cl::init(4), cl::Hidden); + +static cl::opt<unsigned> GainRelativeThreshold( + "select-opti-loop-relative-gain-threshold", + cl::desc( + "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), + cl::init(8), cl::Hidden); + +static cl::opt<unsigned> MispredictDefaultRate( + "mispredict-default-rate", cl::Hidden, cl::init(25), + cl::desc("Default mispredict rate (initialized to 25%).")); + +static cl::opt<bool> + DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, + cl::init(false), + cl::desc("Disable loop-level heuristics.")); + +namespace { + +class SelectOptimize : public FunctionPass { + const TargetMachine *TM = nullptr; + const TargetSubtargetInfo *TSI; + const TargetLowering *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; + const LoopInfo *LI; + DominatorTree *DT; + std::unique_ptr<BlockFrequencyInfo> BFI; + std::unique_ptr<BranchProbabilityInfo> BPI; + ProfileSummaryInfo *PSI; + OptimizationRemarkEmitter *ORE; + TargetSchedModel TSchedModel; + +public: + static char ID; + + SelectOptimize() : FunctionPass(ID) { + initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); + } + +private: + // Select groups consist of consecutive select instructions with the same + // condition. + using SelectGroup = SmallVector<SelectInst *, 2>; + using SelectGroups = SmallVector<SelectGroup, 2>; + + using Scaled64 = ScaledNumber<uint64_t>; + + struct CostInfo { + /// Predicated cost (with selects as conditional moves). + Scaled64 PredCost; + /// Non-predicated cost (with selects converted to branches). + Scaled64 NonPredCost; + }; + + // Converts select instructions of a function to conditional jumps when deemed + // profitable. Returns true if at least one select was converted. + bool optimizeSelects(Function &F); + + // Heuristics for determining which select instructions can be profitably + // conveted to branches. Separate heuristics for selects in inner-most loops + // and the rest of code regions (base heuristics for non-inner-most loop + // regions). + void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); + void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); + + // Converts to branches the select groups that were deemed + // profitable-to-convert. + void convertProfitableSIGroups(SelectGroups &ProfSIGroups); + + // Splits selects of a given basic block into select groups. + void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); + + // Determines for which select groups it is profitable converting to branches + // (base and inner-most-loop heuristics). + void findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, + SelectGroups &ProfSIGroups); + + // Determines if a select group should be converted to a branch (base + // heuristics). + bool isConvertToBranchProfitableBase(const SmallVector<SelectInst *, 2> &ASI); + + // Returns true if there are expensive instructions in the cold value + // operand's (if any) dependence slice of any of the selects of the given + // group. + bool hasExpensiveColdOperand(const SmallVector<SelectInst *, 2> &ASI); + + // For a given source instruction, collect its backwards dependence slice + // consisting of instructions exclusively computed for producing the operands + // of the source instruction. + void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, + bool ForSinking = false); + + // Returns true if the condition of the select is highly predictable. + bool isSelectHighlyPredictable(const SelectInst *SI); + + // Loop-level checks to determine if a non-predicated version (with branches) + // of the given loop is more profitable than its predicated version. + bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); + + // Computes instruction and loop-critical-path costs for both the predicated + // and non-predicated version of the given loop. + bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, + DenseMap<const Instruction *, CostInfo> &InstCostMap, + CostInfo *LoopCost); + + // Returns a set of all the select instructions in the given select groups. + SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups); + + // Returns the latency cost of a given instruction. + Optional<uint64_t> computeInstCost(const Instruction *I); + + // Returns the misprediction cost of a given select when converted to branch. + Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); + + // Returns the cost of a branch when the prediction is correct. + Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI); + + // Returns true if the target architecture supports lowering a given select. + bool isSelectKindSupported(SelectInst *SI); +}; +} // namespace + +char SelectOptimize::ID = 0; + +INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, + false) + +FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } + +bool SelectOptimize::runOnFunction(Function &F) { + TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + + // If none of the select types is supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return false; + + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + BPI.reset(new BranchProbabilityInfo(F, *LI)); + BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); + PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + TSchedModel.init(TSI); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + return false; + + return optimizeSelects(F); +} + +bool SelectOptimize::optimizeSelects(Function &F) { + // Determine for which select groups it is profitable converting to branches. + SelectGroups ProfSIGroups; + // Base heuristics apply only to non-loops and outer loops. + optimizeSelectsBase(F, ProfSIGroups); + // Separate heuristics for inner-most loops. + optimizeSelectsInnerLoops(F, ProfSIGroups); + + // Convert to branches the select groups that were deemed + // profitable-to-convert. + convertProfitableSIGroups(ProfSIGroups); + + // Code modified if at least one select group was converted. + return !ProfSIGroups.empty(); +} + +void SelectOptimize::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { + // Collect all the select groups. + SelectGroups SIGroups; + for (BasicBlock &BB : F) { + // Base heuristics apply only to non-loops and outer loops. + Loop *L = LI->getLoopFor(&BB); + if (L && L->isInnermost()) + continue; + collectSelectGroups(BB, SIGroups); + } + + // Determine for which select groups it is profitable converting to branches. + findProfitableSIGroupsBase(SIGroups, ProfSIGroups); +} + +void SelectOptimize::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) { + SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); + // Need to check size on each iteration as we accumulate child loops. + for (unsigned long i = 0; i < Loops.size(); ++i) + for (Loop *ChildL : Loops[i]->getSubLoops()) + Loops.push_back(ChildL); + + for (Loop *L : Loops) { + if (!L->isInnermost()) + continue; + + SelectGroups SIGroups; + for (BasicBlock *BB : L->getBlocks()) + collectSelectGroups(*BB, SIGroups); + + findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); + } +} + +/// If \p isTrue is true, return the true value of \p SI, otherwise return +/// false value of \p SI. If the true/false value of \p SI is defined by any +/// select instructions in \p Selects, look through the defining select +/// instruction until the true/false value is not defined in \p Selects. +static Value * +getTrueOrFalseValue(SelectInst *SI, bool isTrue, + const SmallPtrSet<const Instruction *, 2> &Selects) { + Value *V = nullptr; + for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); + DefSI = dyn_cast<SelectInst>(V)) { + assert(DefSI->getCondition() == SI->getCondition() && + "The condition of DefSI does not match with SI"); + V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); + } + assert(V && "Failed to get select true/false value"); + return V; +} + +void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : ProfSIGroups) { + // The code transformation here is a modified version of the sinking + // transformation in CodeGenPrepare::optimizeSelectInst with a more + // aggressive strategy of which instructions to sink. + // + // TODO: eliminate the redundancy of logic transforming selects to branches + // by removing CodeGenPrepare::optimizeSelectInst and optimizing here + // selects for all cases (with and without profile information). + + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // %cmp.frozen = freeze %cmp + // br i1 %cmp.frozen, label %select.true, label %select.false + // select.true: + // br label %select.end + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // + // %cmp should be frozen, otherwise it may introduce undefined behavior. + // In addition, we may sink instructions that produce %c or %d into the + // destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + + // Find all the instructions that can be soundly sunk to the true/false + // blocks. These are instructions that are computed solely for producing the + // operands of the select instructions in the group and can be sunk without + // breaking the semantics of the LLVM IR (e.g., cannot sink instructions + // with side effects). + SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices; + typedef std::stack<Instruction *>::size_type StackSizeType; + StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; + for (SelectInst *SI : ASI) { + // For each select, compute the sinkable dependence chains of the true and + // false operands. + if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) { + std::stack<Instruction *> TrueSlice; + getExclBackwardsSlice(TI, TrueSlice, true); + maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); + TrueSlices.push_back(TrueSlice); + } + if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) { + std::stack<Instruction *> FalseSlice; + getExclBackwardsSlice(FI, FalseSlice, true); + maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); + FalseSlices.push_back(FalseSlice); + } + } + // In the case of multiple select instructions in the same group, the order + // of non-dependent instructions (instructions of different dependence + // slices) in the true/false blocks appears to affect performance. + // Interleaving the slices seems to experimentally be the optimal approach. + // This interleaving scheduling allows for more ILP (with a natural downside + // of increasing a bit register pressure) compared to a simple ordering of + // one whole chain after another. One would expect that this ordering would + // not matter since the scheduling in the backend of the compiler would + // take care of it, but apparently the scheduler fails to deliver optimal + // ILP with a naive ordering here. + SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved; + for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) { + for (auto &S : TrueSlices) { + if (!S.empty()) { + TrueSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } + for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) { + for (auto &S : FalseSlices) { + if (!S.empty()) { + FalseSlicesInterleaved.push_back(S.top()); + S.pop(); + } + } + } + + // We split the block containing the select(s) into two blocks. + SelectInst *SI = ASI.front(); + SelectInst *LastSI = ASI.back(); + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency()); + // Delete the unconditional branch that was just created by the split. + StartBlock->getTerminator()->eraseFromParent(); + + // Move any debug/pseudo instructions that were in-between the select + // group to the newly-created end block. + SmallVector<Instruction *, 2> DebugPseudoINS; + auto DIt = SI->getIterator(); + while (&*DIt != LastSI) { + if (DIt->isDebugOrPseudoInst()) + DebugPseudoINS.push_back(&*DIt); + DIt++; + } + for (auto DI : DebugPseudoINS) { + DI->moveBefore(&*EndBlock->getFirstInsertionPt()); + } + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; + BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; + if (!TrueSlicesInterleaved.empty()) { + TrueBlock = BasicBlock::Create(LastSI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + TrueBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *TrueInst : TrueSlicesInterleaved) + TrueInst->moveBefore(TrueBranch); + } + if (!FalseSlicesInterleaved.empty()) { + FalseBlock = BasicBlock::Create(LastSI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(LastSI->getDebugLoc()); + for (Instruction *FalseInst : FalseSlicesInterleaved) + FalseInst->moveBefore(FalseBranch); + } + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + FalseBranch->setDebugLoc(SI->getDebugLoc()); + } + + // Insert the real conditional branch based on the original condition. + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + BasicBlock *TT, *FT; + if (TrueBlock == nullptr) { + TT = EndBlock; + FT = FalseBlock; + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + TT = TrueBlock; + FT = EndBlock; + FalseBlock = StartBlock; + } else { + TT = TrueBlock; + FT = FalseBlock; + } + IRBuilder<> IB(SI); + auto *CondFr = + IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen"); + IB.CreateCondBr(CondFr, TT, FT, SI); + + SmallPtrSet<const Instruction *, 2> INS; + INS.insert(ASI.begin(), ASI.end()); + // Use reverse iterator because later select may use the value of the + // earlier select, and we need to propagate value through earlier select + // to get the PHI operand. + for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { + SelectInst *SI = *It; + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); + PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); + PN->setDebugLoc(SI->getDebugLoc()); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + INS.erase(SI); + ++NumSelectsConverted; + } + } +} + +void SelectOptimize::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { + BasicBlock::iterator BBIt = BB.begin(); + while (BBIt != BB.end()) { + Instruction *I = &*BBIt++; + if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + SelectGroup SIGroup; + SIGroup.push_back(SI); + while (BBIt != BB.end()) { + Instruction *NI = &*BBIt; + SelectInst *NSI = dyn_cast<SelectInst>(NI); + if (NSI && SI->getCondition() == NSI->getCondition()) { + SIGroup.push_back(NSI); + } else if (!NI->isDebugOrPseudoInst()) { + // Debug/pseudo instructions should be skipped and not prevent the + // formation of a select group. + break; + } + ++BBIt; + } + + // If the select type is not supported, no point optimizing it. + // Instruction selection will take care of it. + if (!isSelectKindSupported(SI)) + continue; + + SIGroups.push_back(SIGroup); + } + } +} + +void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, + SelectGroups &ProfSIGroups) { + for (SelectGroup &ASI : SIGroups) { + ++NumSelectOptAnalyzed; + if (isConvertToBranchProfitableBase(ASI)) + ProfSIGroups.push_back(ASI); + } +} + +void SelectOptimize::findProfitableSIGroupsInnerLoops( + const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { + NumSelectOptAnalyzed += SIGroups.size(); + // For each select group in an inner-most loop, + // a branch is more preferable than a select/conditional-move if: + // i) conversion to branches for all the select groups of the loop satisfies + // loop-level heuristics including reducing the loop's critical path by + // some threshold (see SelectOptimize::checkLoopHeuristics); and + // ii) the total cost of the select group is cheaper with a branch compared + // to its predicated version. The cost is in terms of latency and the cost + // of a select group is the cost of its most expensive select instruction + // (assuming infinite resources and thus fully leveraging available ILP). + + DenseMap<const Instruction *, CostInfo> InstCostMap; + CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, + {Scaled64::getZero(), Scaled64::getZero()}}; + if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || + !checkLoopHeuristics(L, LoopCost)) { + return; + } + + for (SelectGroup &ASI : SIGroups) { + // Assuming infinite resources, the cost of a group of instructions is the + // cost of the most expensive instruction of the group. + Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); + for (SelectInst *SI : ASI) { + SelectCost = std::max(SelectCost, InstCostMap[SI].PredCost); + BranchCost = std::max(BranchCost, InstCostMap[SI].NonPredCost); + } + if (BranchCost < SelectCost) { + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front()); + OR << "Profitable to convert to branch (loop analysis). BranchCost=" + << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() + << ". "; + ORE->emit(OR); + ++NumSelectConvertedLoop; + ProfSIGroups.push_back(ASI); + } else { + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); + ORmiss << "Select is more profitable (loop analysis). BranchCost=" + << BranchCost.toString() + << ", SelectCost=" << SelectCost.toString() << ". "; + ORE->emit(ORmiss); + } + } +} + +bool SelectOptimize::isConvertToBranchProfitableBase( + const SmallVector<SelectInst *, 2> &ASI) { + SelectInst *SI = ASI.front(); + OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); + + // Skip cold basic blocks. Better to optimize for size for cold blocks. + if (PSI->isColdBlock(SI->getParent(), BFI.get())) { + ++NumSelectColdBB; + ORmiss << "Not converted to branch because of cold basic block. "; + ORE->emit(ORmiss); + return false; + } + + // If unpredictable, branch form is less profitable. + if (SI->getMetadata(LLVMContext::MD_unpredictable)) { + ++NumSelectUnPred; + ORmiss << "Not converted to branch because of unpredictable branch. "; + ORE->emit(ORmiss); + return false; + } + + // If highly predictable, branch form is more profitable, unless a + // predictable select is inexpensive in the target architecture. + if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { + ++NumSelectConvertedHighPred; + OR << "Converted to branch because of highly predictable branch. "; + ORE->emit(OR); + return true; + } + + // Look for expensive instructions in the cold operand's (if any) dependence + // slice of any of the selects in the group. + if (hasExpensiveColdOperand(ASI)) { + ++NumSelectConvertedExpColdOperand; + OR << "Converted to branch because of expensive cold operand."; + ORE->emit(OR); + return true; + } + + ORmiss << "Not profitable to convert to branch (base heuristic)."; + ORE->emit(ORmiss); + return false; +} + +static InstructionCost divideNearest(InstructionCost Numerator, + uint64_t Denominator) { + return (Numerator + (Denominator / 2)) / Denominator; +} + +bool SelectOptimize::hasExpensiveColdOperand( + const SmallVector<SelectInst *, 2> &ASI) { + bool ColdOperand = false; + uint64_t TrueWeight, FalseWeight, TotalWeight; + if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t MinWeight = std::min(TrueWeight, FalseWeight); + TotalWeight = TrueWeight + FalseWeight; + // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? + ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight; + } else if (PSI->hasProfileSummary()) { + OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); + ORmiss << "Profile data available but missing branch-weights metadata for " + "select instruction. "; + ORE->emit(ORmiss); + } + if (!ColdOperand) + return false; + // Check if the cold path's dependence slice is expensive for any of the + // selects of the group. + for (SelectInst *SI : ASI) { + Instruction *ColdI = nullptr; + uint64_t HotWeight; + if (TrueWeight < FalseWeight) { + ColdI = dyn_cast<Instruction>(SI->getTrueValue()); + HotWeight = FalseWeight; + } else { + ColdI = dyn_cast<Instruction>(SI->getFalseValue()); + HotWeight = TrueWeight; + } + if (ColdI) { + std::stack<Instruction *> ColdSlice; + getExclBackwardsSlice(ColdI, ColdSlice); + InstructionCost SliceCost = 0; + while (!ColdSlice.empty()) { + SliceCost += TTI->getInstructionCost(ColdSlice.top(), + TargetTransformInfo::TCK_Latency); + ColdSlice.pop(); + } + // The colder the cold value operand of the select is the more expensive + // the cmov becomes for computing the cold value operand every time. Thus, + // the colder the cold operand is the more its cost counts. + // Get nearest integer cost adjusted for coldness. + InstructionCost AdjSliceCost = + divideNearest(SliceCost * HotWeight, TotalWeight); + if (AdjSliceCost >= + ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) + return true; + } + } + return false; +} + +// For a given source instruction, collect its backwards dependence slice +// consisting of instructions exclusively computed for the purpose of producing +// the operands of the source instruction. As an approximation +// (sufficiently-accurate in practice), we populate this set with the +// instructions of the backwards dependence slice that only have one-use and +// form an one-use chain that leads to the source instruction. +void SelectOptimize::getExclBackwardsSlice(Instruction *I, + std::stack<Instruction *> &Slice, + bool ForSinking) { + SmallPtrSet<Instruction *, 2> Visited; + std::queue<Instruction *> Worklist; + Worklist.push(I); + while (!Worklist.empty()) { + Instruction *II = Worklist.front(); + Worklist.pop(); + + // Avoid cycles. + if (!Visited.insert(II).second) + continue; + + if (!II->hasOneUse()) + continue; + + // Cannot soundly sink instructions with side-effects. + // Terminator or phi instructions cannot be sunk. + // Avoid sinking other select instructions (should be handled separetely). + if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || + isa<SelectInst>(II) || isa<PHINode>(II))) + continue; + + // Avoid considering instructions with less frequency than the source + // instruction (i.e., avoid colder code regions of the dependence slice). + if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) + continue; + + // Eligible one-use instruction added to the dependence slice. + Slice.push(II); + + // Explore all the operands of the current instruction to expand the slice. + for (unsigned k = 0; k < II->getNumOperands(); ++k) + if (auto *OpI = dyn_cast<Instruction>(II->getOperand(k))) + Worklist.push(OpI); + } +} + +bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TTI->getPredictableBranchThreshold()) + return true; + } + } + return false; +} + +bool SelectOptimize::checkLoopHeuristics(const Loop *L, + const CostInfo LoopCost[2]) { + // Loop-level checks to determine if a non-predicated version (with branches) + // of the loop is more profitable than its predicated version. + + if (DisableLoopLevelHeuristics) + return true; + + OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", + L->getHeader()->getFirstNonPHI()); + + if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || + LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { + ORmissL << "No select conversion in the loop due to no reduction of loop's " + "critical path. "; + ORE->emit(ORmissL); + return false; + } + + Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, + LoopCost[1].PredCost - LoopCost[1].NonPredCost}; + + // Profitably converting to branches need to reduce the loop's critical path + // by at least some threshold (absolute gain of GainCycleThreshold cycles and + // relative gain of 12.5%). + if (Gain[1] < Scaled64::get(GainCycleThreshold) || + Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { + Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; + ORmissL << "No select conversion in the loop due to small reduction of " + "loop's critical path. Gain=" + << Gain[1].toString() + << ", RelativeGain=" << RelativeGain.toString() << "%. "; + ORE->emit(ORmissL); + return false; + } + + // If the loop's critical path involves loop-carried dependences, the gradient + // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). + // This check ensures that the latency reduction for the loop's critical path + // keeps decreasing with sufficient rate beyond the two analyzed loop + // iterations. + if (Gain[1] > Gain[0]) { + Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / + (LoopCost[1].PredCost - LoopCost[0].PredCost); + if (GradientGain < Scaled64::get(GainGradientThreshold)) { + ORmissL << "No select conversion in the loop due to small gradient gain. " + "GradientGain=" + << GradientGain.toString() << "%. "; + ORE->emit(ORmissL); + return false; + } + } + // If the gain decreases it is not profitable to convert. + else if (Gain[1] < Gain[0]) { + ORmissL + << "No select conversion in the loop due to negative gradient gain. "; + ORE->emit(ORmissL); + return false; + } + + // Non-predicated version of the loop is more profitable than its + // predicated version. + return true; +} + +// Computes instruction and loop-critical-path costs for both the predicated +// and non-predicated version of the given loop. +// Returns false if unable to compute these costs due to invalid cost of loop +// instruction(s). +bool SelectOptimize::computeLoopCosts( + const Loop *L, const SelectGroups &SIGroups, + DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { + const auto &SIset = getSIset(SIGroups); + // Compute instruction and loop-critical-path costs across two iterations for + // both predicated and non-predicated version. + const unsigned Iterations = 2; + for (unsigned Iter = 0; Iter < Iterations; ++Iter) { + // Cost of the loop's critical path. + CostInfo &MaxCost = LoopCost[Iter]; + for (BasicBlock *BB : L->getBlocks()) { + for (const Instruction &I : *BB) { + if (I.isDebugOrPseudoInst()) + continue; + // Compute the predicated and non-predicated cost of the instruction. + Scaled64 IPredCost = Scaled64::getZero(), + INonPredCost = Scaled64::getZero(); + + // Assume infinite resources that allow to fully exploit the available + // instruction-level parallelism. + // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) + for (const Use &U : I.operands()) { + auto UI = dyn_cast<Instruction>(U.get()); + if (!UI) + continue; + if (InstCostMap.count(UI)) { + IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost); + INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost); + } + } + auto ILatency = computeInstCost(&I); + if (!ILatency) { + OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); + ORmissL << "Invalid instruction cost preventing analysis and " + "optimization of the inner-most loop containing this " + "instruction. "; + ORE->emit(ORmissL); + return false; + } + IPredCost += Scaled64::get(ILatency.getValue()); + INonPredCost += Scaled64::get(ILatency.getValue()); + + // For a select that can be converted to branch, + // compute its cost as a branch (non-predicated cost). + // + // BranchCost = PredictedPathCost + MispredictCost + // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb + // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate + if (SIset.contains(&I)) { + auto SI = dyn_cast<SelectInst>(&I); + + Scaled64 TrueOpCost = Scaled64::getZero(), + FalseOpCost = Scaled64::getZero(); + if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) + if (InstCostMap.count(TI)) + TrueOpCost = InstCostMap[TI].NonPredCost; + if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) + if (InstCostMap.count(FI)) + FalseOpCost = InstCostMap[FI].NonPredCost; + Scaled64 PredictedPathCost = + getPredictedPathCost(TrueOpCost, FalseOpCost, SI); + + Scaled64 CondCost = Scaled64::getZero(); + if (auto *CI = dyn_cast<Instruction>(SI->getCondition())) + if (InstCostMap.count(CI)) + CondCost = InstCostMap[CI].NonPredCost; + Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); + + INonPredCost = PredictedPathCost + MispredictCost; + } + + InstCostMap[&I] = {IPredCost, INonPredCost}; + MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); + MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); + } + } + } + return true; +} + +SmallPtrSet<const Instruction *, 2> +SelectOptimize::getSIset(const SelectGroups &SIGroups) { + SmallPtrSet<const Instruction *, 2> SIset; + for (const SelectGroup &ASI : SIGroups) + for (const SelectInst *SI : ASI) + SIset.insert(SI); + return SIset; +} + +Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { + InstructionCost ICost = + TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); + if (auto OC = ICost.getValue()) + return Optional<uint64_t>(*OC); + return Optional<uint64_t>(None); +} + +ScaledNumber<uint64_t> +SelectOptimize::getMispredictionCost(const SelectInst *SI, + const Scaled64 CondCost) { + uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; + + // Account for the default misprediction rate when using a branch + // (conservatively set to 25% by default). + uint64_t MispredictRate = MispredictDefaultRate; + // If the select condition is obviously predictable, then the misprediction + // rate is zero. + if (isSelectHighlyPredictable(SI)) + MispredictRate = 0; + + // CondCost is included to account for cases where the computation of the + // condition is part of a long dependence chain (potentially loop-carried) + // that would delay detection of a misprediction and increase its cost. + Scaled64 MispredictCost = + std::max(Scaled64::get(MispredictPenalty), CondCost) * + Scaled64::get(MispredictRate); + MispredictCost /= Scaled64::get(100); + + return MispredictCost; +} + +// Returns the cost of a branch when the prediction is correct. +// TrueCost * TrueProbability + FalseCost * FalseProbability. +ScaledNumber<uint64_t> +SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI) { + Scaled64 PredPathCost; + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t SumWeight = TrueWeight + FalseWeight; + if (SumWeight != 0) { + PredPathCost = TrueCost * Scaled64::get(TrueWeight) + + FalseCost * Scaled64::get(FalseWeight); + PredPathCost /= Scaled64::get(SumWeight); + return PredPathCost; + } + } + // Without branch weight metadata, we assume 75% for the one path and 25% for + // the other, and pick the result with the biggest cost. + PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, + FalseCost * Scaled64::get(3) + TrueCost); + PredPathCost /= Scaled64::get(4); + return PredPathCost; +} + +bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + if (VectorCond) + return false; + TargetLowering::SelectSupportKind SelectKind; + if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + return TLI->isSelectSupported(SelectKind); +} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ec297579090e..aa688d9dda3c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,7 +35,6 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -52,7 +51,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -426,6 +424,7 @@ namespace { SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); + SDValue visitAVG(SDNode *N); SDValue visitSMUL_LOHI(SDNode *N); SDValue visitUMUL_LOHI(SDNode *N); SDValue visitMULO(SDNode *N); @@ -511,6 +510,7 @@ namespace { SDValue visitMSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); + SDValue visitFP_TO_BF16(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); @@ -520,7 +520,9 @@ namespace { SDValue XformToShuffleWithZero(SDNode *N); bool reassociationCanBreakAddressingModePattern(unsigned Opc, - const SDLoc &DL, SDValue N0, + const SDLoc &DL, + SDNode *N, + SDValue N0, SDValue N1); SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1); @@ -570,6 +572,8 @@ namespace { SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildSREMPow2(SDNode *N); + SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); @@ -583,11 +587,11 @@ namespace { bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, - SDValue InnerPos, SDValue InnerNeg, + SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, - SDValue InnerPos, SDValue InnerNeg, + SDValue InnerPos, SDValue InnerNeg, bool HasPos, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); @@ -665,9 +669,8 @@ namespace { /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). /// MulNode is the original multiply, AddNode is (add x, c1), /// and ConstNode is c2. - bool isMulAddWithConstProfitable(SDNode *MulNode, - SDValue &AddNode, - SDValue &ConstNode); + bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, + SDValue ConstNode); /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns @@ -880,8 +883,8 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { // We provide an Offset so that we can create bitwidths that won't overflow. static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); - LHS = LHS.zextOrSelf(Bits); - RHS = RHS.zextOrSelf(Bits); + LHS = LHS.zext(Bits); + RHS = RHS.zext(Bits); } // Return true if this node is a setcc, or is a select_cc @@ -926,7 +929,7 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, /// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; - if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse()) return true; return false; } @@ -996,6 +999,7 @@ static bool canSplitIdx(LoadSDNode *LD) { bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, + SDNode *N, SDValue N0, SDValue N1) { // Currently this only tries to ensure we don't undo the GEP splits done by @@ -1004,33 +1008,62 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, // (load/store (add, (add, x, offset1), offset2)) -> // (load/store (add, x, offset1+offset2)). - if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) - return false; + // (load/store (add, (add, x, y), offset2)) -> + // (load/store (add, (add, x, offset2), y)). - if (N0.hasOneUse()) + if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD) return false; - auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); auto *C2 = dyn_cast<ConstantSDNode>(N1); - if (!C1 || !C2) + if (!C2) return false; - const APInt &C1APIntVal = C1->getAPIntValue(); const APInt &C2APIntVal = C2->getAPIntValue(); - if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64) + if (C2APIntVal.getSignificantBits() > 64) return false; - const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; - if (CombinedValueIntVal.getBitWidth() > 64) - return false; - const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); - - for (SDNode *Node : N0->uses()) { - auto LoadStore = dyn_cast<MemSDNode>(Node); - if (LoadStore) { - // Is x[offset2] already not a legal addressing mode? If so then - // reassociating the constants breaks nothing (we test offset2 because - // that's the one we hope to fold into the load or store). + if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + if (N0.hasOneUse()) + return false; + + const APInt &C1APIntVal = C1->getAPIntValue(); + const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal; + if (CombinedValueIntVal.getSignificantBits() > 64) + return false; + const int64_t CombinedValue = CombinedValueIntVal.getSExtValue(); + + for (SDNode *Node : N->uses()) { + if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) { + // Is x[offset2] already not a legal addressing mode? If so then + // reassociating the constants breaks nothing (we test offset2 because + // that's the one we hope to fold into the load or store). + TargetLoweringBase::AddrMode AM; + AM.HasBaseReg = true; + AM.BaseOffs = C2APIntVal.getSExtValue(); + EVT VT = LoadStore->getMemoryVT(); + unsigned AS = LoadStore->getAddressSpace(); + Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + continue; + + // Would x[offset1+offset2] still be a legal addressing mode? + AM.BaseOffs = CombinedValue; + if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) + return true; + } + } + } else { + if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1))) + if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA)) + return false; + + for (SDNode *Node : N->uses()) { + auto *LoadStore = dyn_cast<MemSDNode>(Node); + if (!LoadStore) + return false; + + // Is x[offset2] a legal addressing mode? If so then + // reassociating the constants breaks address pattern TargetLoweringBase::AddrMode AM; AM.HasBaseReg = true; AM.BaseOffs = C2APIntVal.getSExtValue(); @@ -1038,13 +1071,9 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, unsigned AS = LoadStore->getAddressSpace(); Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) - continue; - - // Would x[offset1+offset2] still be a legal addressing mode? - AM.BaseOffs = CombinedValue; - if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS)) - return true; + return false; } + return true; } return false; @@ -1072,11 +1101,51 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) - return DAG.getNode(Opc, DL, VT, OpNode, N01); - return SDValue(); + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1); + return DAG.getNode(Opc, DL, VT, OpNode, N01); + } + } + + // Check for repeated operand logic simplifications. + if (Opc == ISD::AND || Opc == ISD::OR) { + // (N00 & N01) & N00 --> N00 & N01 + // (N00 & N01) & N01 --> N00 & N01 + // (N00 | N01) | N00 --> N00 | N01 + // (N00 | N01) | N01 --> N00 | N01 + if (N1 == N00 || N1 == N01) + return N0; + } + if (Opc == ISD::XOR) { + // (N00 ^ N01) ^ N00 --> N01 + if (N1 == N00) + return N01; + // (N00 ^ N01) ^ N01 --> N00 + if (N1 == N01) + return N00; + } + + if (TLI.isReassocProfitable(DAG, N0, N1)) { + if (N1 != N01) { + // Reassociate if (op N00, N1) already exist + if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) { + // if Op (Op N00, N1), N01 already exist + // we need to stop reassciate to avoid dead loop + if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01})) + return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01); + } + } + + if (N1 != N00) { + // Reassociate if (op N01, N1) already exist + if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) { + // if Op (Op N01, N1), N00 already exist + // we need to stop reassciate to avoid dead loop + if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00})) + return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00); + } } } + return SDValue(); } @@ -1103,7 +1172,7 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; - To[0].getNode()->dump(&DAG); + To[0].dump(&DAG); dbgs() << " and " << NumTo - 1 << " other values\n"); for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || @@ -1115,10 +1184,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { - if (To[i].getNode()) { - AddToWorklist(To[i].getNode()); - AddUsersToWorklist(To[i].getNode()); - } + if (To[i].getNode()) + AddToWorklistWithUsers(To[i].getNode()); } } @@ -1134,9 +1201,8 @@ void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace the old value with the new one. ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG); + dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n'); // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. @@ -1149,7 +1215,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (TLO.Old.getNode()->use_empty()) + if (TLO.Old->use_empty()) deleteAndRecombine(TLO.Old.getNode()); } @@ -1196,7 +1262,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; - Trunc.getNode()->dump(&DAG); dbgs() << '\n'); + Trunc.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); @@ -1295,7 +1361,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); bool Replace0 = false; SDValue N0 = Op.getOperand(0); @@ -1322,7 +1388,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { // If operands have a use ordering, make sure we deal with // predecessor first. - if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { + if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) { std::swap(N0, N1); std::swap(NN0, NN1); } @@ -1363,11 +1429,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); bool Replace = false; SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); if (Opc == ISD::SRA) N0 = SExtPromoteOperand(N0, PVT); else if (Opc == ISD::SRL) @@ -1379,6 +1444,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { return SDValue(); SDLoc DL(Op); + SDValue N1 = Op.getOperand(1); SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); @@ -1414,7 +1480,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) - LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG)); return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); @@ -1455,7 +1521,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; - Result.getNode()->dump(&DAG); dbgs() << '\n'); + Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); @@ -1569,9 +1635,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG)); - if (N->getNumValues() == RV.getNode()->getNumValues()) + if (N->getNumValues() == RV->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { assert(N->getValueType(0) == RV.getValueType() && @@ -1635,6 +1701,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::UREM: return visitREM(N); case ISD::MULHU: return visitMULHU(N); case ISD::MULHS: return visitMULHS(N); + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: return visitAVG(N); case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); case ISD::SMULO: @@ -1724,6 +1794,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); case ISD::FREEZE: return visitFREEZE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: @@ -2072,8 +2143,9 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, return false; VT = ST->getMemoryVT(); AS = ST->getAddressSpace(); - } else + } else { return false; + } TargetLowering::AddrMode AM; if (N->getOpcode() == ISD::ADD) { @@ -2094,8 +2166,9 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, else // [reg +/- reg] AM.Scale = 1; - } else + } else { return false; + } return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, VT.getTypeForEVT(*DAG.getContext()), AS); @@ -2139,6 +2212,18 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, return C->isExactlyValue(1.0); } } + if (ConstantSDNode *C = isConstOrConstSplat(V)) { + switch (Opcode) { + case ISD::ADD: // X + 0 --> X + case ISD::SUB: // X - 0 --> X + case ISD::SHL: // X << 0 --> X + case ISD::SRA: // X s>> 0 --> X + case ISD::SRL: // X u>> 0 --> X + return C->isZero(); + case ISD::MUL: // X * 1 --> X + return C->isOne(); + } + } return false; }; @@ -2316,6 +2401,15 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static bool isADDLike(SDValue V, const SelectionDAG &DAG) { + unsigned Opcode = V.getOpcode(); + if (Opcode == ISD::OR) + return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)); + if (Opcode == ISD::XOR) + return isMinSignedConstant(V.getOperand(1)); + return false; +} + /// Try to fold a node that behaves like an ADD (note that N isn't necessarily /// an ISD::ADD here, it could for example be an ISD::OR if we know that there /// are no common bits set in the operands). @@ -2354,66 +2448,60 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { if (isNullConstant(N1)) return N0; - if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { + if (N0.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // fold ((A-c1)+c2) -> (A+(c2-c1)) - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { - SDValue Sub = - DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)}); - assert(Sub && "Constant folding failed"); + if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01})) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); - } // fold ((c1-A)+c2) -> (c1+c2)-A - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - SDValue Add = - DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)}); - assert(Add && "Constant folding failed"); + if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00})) return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); - } + } - // add (sext i1 X), 1 -> zext (not i1 X) - // We don't transform this pattern: - // add (zext i1 X), -1 -> sext (not i1 X) - // because most (?) targets generate better code for the zext form. - if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && - isOneOrOneSplat(N1)) { - SDValue X = N0.getOperand(0); - if ((!LegalOperations || - (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && - TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && - X.getScalarValueSizeInBits() == 1) { - SDValue Not = DAG.getNOT(DL, X, X.getValueType()); - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); - } + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneOrOneSplat(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } - // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is - // equivalent to (add x, c0). - if (N0.getOpcode() == ISD::OR && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && - DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { - if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, - {N1, N0.getOperand(1)})) - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); - } + // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) + // iff (or x, c0) is equivalent to (add x, c0). + // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1)) + // iff (xor x, c0) is equivalent to (add x, c0). + if (isADDLike(N0, DAG)) { + SDValue N01 = N0.getOperand(1); + if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01})) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add); } if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // reassociate add - if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) { + if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) { if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) return RADD; // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is // equivalent to (add x, c). + // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is + // equivalent to (add x, c). auto ReassociateAddOr = [&](SDValue N0, SDValue N1) { - if (N0.getOpcode() == ISD::OR && N0.hasOneUse() && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && - DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + if (isADDLike(N0, DAG) && N0.hasOneUse() && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { return DAG.getNode(ISD::ADD, DL, VT, DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), N0.getOperand(1)); @@ -2473,7 +2561,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant - if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB && + N0->hasOneUse() && N1->hasOneUse()) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); SDValue N10 = N1.getOperand(0); @@ -2526,8 +2615,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // add (add x, y), 1 // And if the target does not like this form then turn into: // sub y, (xor x, -1) - if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && - N0.getOpcode() == ISD::ADD) { + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD && + N0.hasOneUse()) { SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not); @@ -2535,7 +2624,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { } // (x - y) + -1 -> add (xor y, -1), x - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isAllOnesOrAllOnesSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); @@ -2632,7 +2721,8 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { // fold vector ops if (VT.isVector()) { - // TODO SimplifyVBinOp + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; // fold (add_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) @@ -2678,7 +2768,7 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) return SDValue(); - EVT VT = V.getNode()->getValueType(0); + EVT VT = V->getValueType(0); if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT)) return SDValue(); @@ -2731,27 +2821,27 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, // add (add x, 1), y // And if the target does not like this form then turn into: // sub y, (xor x, -1) - if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() && - N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) { + if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD && + N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) { SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::SUB, DL, VT, N1, Not); } - // Hoist one-use subtraction by non-opaque constant: - // (x - C) + y -> (x + y) - C - // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); - return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); - } - // Hoist one-use subtraction from non-opaque constant: - // (C - x) + y -> (y - x) + C - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) { + // Hoist one-use subtraction by non-opaque constant: + // (x - C) + y -> (x + y) - C + // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. + if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); + } + // Hoist one-use subtraction from non-opaque constant: + // (C - x) + y -> (y - x) + C + if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0)); + } } // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1' @@ -3127,21 +3217,26 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with // a single path for carry/borrow out propagation: static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, - SDValue Carry0, SDValue Carry1, SDNode *N) { - if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1) + SDValue N0, SDValue N1, SDNode *N) { + SDValue Carry0 = getAsCarry(TLI, N0); + if (!Carry0) return SDValue(); + SDValue Carry1 = getAsCarry(TLI, N1); + if (!Carry1) + return SDValue(); + unsigned Opcode = Carry0.getOpcode(); if (Opcode != Carry1.getOpcode()) return SDValue(); if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) return SDValue(); - // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the - // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in - // the above ASCII art.) - if (Carry1.getOperand(0) != Carry0.getValue(0) && - Carry1.getOperand(1) != Carry0.getValue(0)) + // Canonicalize the add/sub of A and B (the top node in the above ASCII art) + // as Carry0 and the add/sub of the carry in as Carry1 (the middle node). + if (Carry1.getNode()->isOperandOf(Carry0.getNode())) std::swap(Carry0, Carry1); + + // Check if nodes are connected in expected way. if (Carry1.getOperand(0) != Carry0.getValue(0) && Carry1.getOperand(1) != Carry0.getValue(0)) return SDValue(); @@ -3321,9 +3416,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { EVT VT = N0.getValueType(); SDLoc DL(N); + auto PeekThroughFreeze = [](SDValue N) { + if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) + return N->getOperand(0); + return N; + }; + // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. - if (N0 == N1) + if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1)) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); // fold (sub c1, c2) -> c3 @@ -3381,7 +3482,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // Convert 0 - abs(x). - if (N1->getOpcode() == ISD::ABS && + if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() && !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) return Result; @@ -3419,44 +3520,31 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0.getOperand(0); // fold (A+C1)-C2 -> A+(C1-C2) - if (N0.getOpcode() == ISD::ADD && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = - DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1}); - assert(NewC && "Constant folding failed"); - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); + if (N0.getOpcode() == ISD::ADD) { + SDValue N01 = N0.getOperand(1); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1})) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); } // fold C2-(A+C1) -> (C2-C1)-A if (N1.getOpcode() == ISD::ADD) { SDValue N11 = N1.getOperand(1); - if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && - isConstantOrConstantVector(N11, /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}); - assert(NewC && "Constant folding failed"); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11})) return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); - } } // fold (A-C1)-C2 -> A-(C1+C2) - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = - DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1}); - assert(NewC && "Constant folding failed"); - return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); + if (N0.getOpcode() == ISD::SUB) { + SDValue N01 = N0.getOperand(1); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1})) + return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); } // fold (c1-A)-c2 -> (c1-c2)-A - if (N0.getOpcode() == ISD::SUB && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) { - SDValue NewC = - DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1}); - assert(NewC && "Constant folding failed"); - return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); + if (N0.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1})) + return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -3651,6 +3739,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // As with the previous fold, prefer add for more folding potential. + // Subtracting SMIN/0 is the same as adding SMIN/0: + // N0 - (X << BW-1) --> N0 + (X << BW-1) + if (N1.getOpcode() == ISD::SHL) { + ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1)); + if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::ADD, DL, VT, N1, N0); + } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) if (SDValue Carry = getAsCarry(TLI, N0)) { @@ -3686,7 +3783,8 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { // fold vector ops if (VT.isVector()) { - // TODO SimplifyVBinOp + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; // fold (sub_sat x, 0) -> x, vector edition if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) @@ -3837,19 +3935,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold (mul x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // fold (mul c1, c2) -> c1*c2 - if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + return DAG.getNode(ISD::MUL, DL, VT, N1, N0); bool N1IsConst = false; bool N1IsOpaqueConst = false; @@ -3857,7 +3956,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3884,17 +3983,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return NewSel; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnes()) { - SDLoc DL(N); + if (N1IsConst && ConstValue1.isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); - } // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { - SDLoc DL(N); SDValue LogBase2 = BuildLogBase2(N1, DL); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); @@ -3904,7 +4000,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); - SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. return DAG.getNode(ISD::SUB, DL, VT, @@ -3949,7 +4044,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { ShAmt += TZeros; assert(ShAmt < VT.getScalarSizeInBits() && "multiply-by-constant generated out of bounds shift"); - SDLoc DL(N); SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT)); SDValue R = @@ -3964,12 +4058,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N1, /* NoOpaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - if (isConstantOrConstantVector(C3)) - return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); + if (N0.getOpcode() == ISD::SHL) { + SDValue N01 = N0.getOperand(1); + if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01})) + return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one @@ -3979,18 +4071,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - isConstantOrConstantVector(N0.getOperand(1)) && - N0.getNode()->hasOneUse()) { + isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && isConstantOrConstantVector(N1.getOperand(1)) && - N1.getNode()->hasOneUse()) { + N1->hasOneUse()) { Sh = N1; Y = N0; } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); } } @@ -3999,18 +4090,17 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getOpcode() == ISD::ADD && DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, - N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, - N0.getOperand(1), N1)); + return DAG.getNode( + ISD::ADD, DL, VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). if (N0.getOpcode() == ISD::VSCALE) if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = NC1->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, C0 * C1); + return DAG.getVScale(DL, VT, C0 * C1); } // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). @@ -4019,7 +4109,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); APInt NewStep = C0 * MulVal; - return DAG.getStepVector(SDLoc(N), VT, NewStep); + return DAG.getStepVector(DL, VT, NewStep); } // Fold ((mul x, 0/undef) -> 0, @@ -4041,7 +4131,6 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) && ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) { assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector"); - SDLoc DL(N); EVT LegalSVT = N1.getOperand(0).getValueType(); SDValue Zero = DAG.getConstant(0, DL, LegalSVT); SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT); @@ -4054,7 +4143,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // reassociate mul - if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) + if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) return RMUL; return SDValue(); @@ -4117,7 +4206,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; - for (SDNode *User : Op0.getNode()->uses()) { + for (SDNode *User : Op0->uses()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; @@ -4257,12 +4346,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { - SDLoc DL(N); - EVT VT = N->getValueType(0); - EVT CCVT = getSetCCResultType(VT); - unsigned BitWidth = VT.getScalarSizeInBits(); - +static bool isDivisorPowerOfTwo(SDValue Divisor) { // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { @@ -4275,11 +4359,20 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { return false; }; + return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo); +} + +SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + unsigned BitWidth = VT.getScalarSizeInBits(); + // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. - if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) { + if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; @@ -4435,6 +4528,16 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { return SDValue(); } +SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) { + if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) && + !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) { + // Target-specific implementation of srem x, pow2. + if (SDValue Res = BuildSREMPow2(N)) + return Res; + } + return SDValue(); +} + // handles ISD::SREM and ISD::UREM SDValue DAGCombiner::visitREM(SDNode *N) { unsigned Opcode = N->getOpcode(); @@ -4451,10 +4554,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; - // fold (urem X, -1) -> select(X == -1, 0, x) - if (!isSigned && N1C && N1C->isAllOnes()) - return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), - DAG.getConstant(0, DL, VT), N0); + // fold (urem X, -1) -> select(FX == -1, 0, FX) + // Freeze the numerator to avoid a miscompile with an undefined value. + if (!isSigned && N1C && N1C->isAllOnes()) { + SDValue F0 = DAG.getFreeze(N0); + SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); + return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); + } if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -4495,6 +4601,12 @@ SDValue DAGCombiner::visitREM(SDNode *N) { // combine will not return a DIVREM. Regardless, checking cheapness here // makes sense since the simplification results in fatter code. if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { + if (isSigned) { + // check if we can build faster implementation for srem + if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N)) + return OptimizedRem; + } + SDValue OptimizedDiv = isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) { @@ -4654,6 +4766,46 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitAVG(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // fold (avg c1, c2) + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) + return C; + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0); + + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) + return FoldedVOp; + + // fold (avgfloor x, 0) -> x >> 1 + if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) { + if (Opcode == ISD::AVGFLOORS) + return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT)); + if (Opcode == ISD::AVGFLOORU) + return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT)); + } + } + + // fold (avg x, undef) -> x + if (N0.isUndef()) + return N1; + if (N1.isUndef()) + return N0; + + // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1 + + return SDValue(); +} + /// Perform optimizations common to nodes that compute two values. LoOp and HiOp /// give the opcodes for the two computations that are being performed. Return /// true if a simplification was made. @@ -4812,7 +4964,9 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); // (mulo x, 2) -> (addo x, x) - if (N1C && N1C->getAPIntValue() == 2) + // FIXME: This needs a freeze. + if (N1C && N1C->getAPIntValue() == 2 && + (!IsSigned || VT.getScalarSizeInBits() > 2)) return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, N->getVTList(), N0, N0); @@ -4869,8 +5023,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, return 0; const APInt &C1 = N1C->getAPIntValue(); const APInt &C2 = N3C->getAPIntValue(); - if (C1.getBitWidth() < C2.getBitWidth() || - C1 != C2.sextOrSelf(C1.getBitWidth())) + if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth())) return 0; return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); }; @@ -4977,7 +5130,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, const APInt &C1 = N1C->getAPIntValue(); const APInt &C3 = N3C->getAPIntValue(); if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() || - C1 != C3.zextOrSelf(C1.getBitWidth())) + C1 != C3.zext(C1.getBitWidth())) return SDValue(); unsigned BW = (C1 + 1).exactLogBase2(); @@ -5007,6 +5160,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; + // If the operands are the same, this is a no-op. + if (N0 == N1) + return N0; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) @@ -5312,29 +5469,27 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, } // Turn compare of constants whose difference is 1 bit into add+and+setcc. - // TODO - support non-uniform vector amounts. if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) { // Match a shared variable operand and 2 non-opaque constant operands. - ConstantSDNode *C0 = isConstOrConstSplat(LR); - ConstantSDNode *C1 = isConstOrConstSplat(RR); - if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) { + auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) { + // The difference of the constants must be a single bit. const APInt &CMax = APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue()); const APInt &CMin = APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue()); - // The difference of the constants must be a single bit. - if ((CMax - CMin).isPowerOf2()) { - // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> - // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq - SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); - SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); - SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); - SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); - SDValue Mask = DAG.getNOT(DL, Diff, OpVT); - SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); - SDValue Zero = DAG.getConstant(0, DL, OpVT); - return DAG.getSetCC(DL, VT, And, Zero, CC0); - } + return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2(); + }; + if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) { + // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) --> + // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq + SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR); + SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR); + SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min); + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min); + SDValue Mask = DAG.getNOT(DL, Diff, OpVT); + SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, And, Zero, CC0); } } } @@ -5836,6 +5991,9 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { if (ShiftAmt.uge(VTBitWidth)) return SDValue(); + if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1))) + return SDValue(); + // Turn this into a bit-test pattern using mask op + setcc: // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 SDLoc DL(And); @@ -5882,6 +6040,53 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); } +/// Given a bitwise logic operation N with a matching bitwise logic operand, +/// fold a pattern where 2 of the source operands are identically shifted +/// values. For example: +/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z +static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, + SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR) + && "Expected bitwise logic operation"); + + if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse()) + return SDValue(); + + // Match another bitwise logic op and a shift. + unsigned ShiftOpcode = ShiftOp.getOpcode(); + if (LogicOp.getOpcode() != LogicOpcode || + !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL || + ShiftOpcode == ISD::SRA)) + return SDValue(); + + // Match another shift op inside the first logic operand. Handle both commuted + // possibilities. + // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z + // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z + SDValue X1 = ShiftOp.getOperand(0); + SDValue Y = ShiftOp.getOperand(1); + SDValue X0, Z; + if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode && + LogicOp.getOperand(0).getOperand(1) == Y) { + X0 = LogicOp.getOperand(0).getOperand(0); + Z = LogicOp.getOperand(1); + } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode && + LogicOp.getOperand(1).getOperand(1) == Y) { + X0 = LogicOp.getOperand(1).getOperand(0); + Z = LogicOp.getOperand(0); + } else { + return SDValue(); + } + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1); + SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y); + return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5915,27 +6120,25 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) return N0; - // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load + // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0); - auto *BVec = dyn_cast<BuildVectorSDNode>(N1); - if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD && - N0.hasOneUse() && N1.hasOneUse()) { + ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true); + if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() && + Splat && N1.hasOneUse()) { EVT LoadVT = MLoad->getMemoryVT(); EVT ExtVT = VT; if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { // For this AND to be a zero extension of the masked load the elements // of the BuildVec must mask the bottom bits of the extended element // type - if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) { - uint64_t ElementSize = - LoadVT.getVectorElementType().getScalarSizeInBits(); - if (Splat->getAPIntValue().isMask(ElementSize)) { - return DAG.getMaskedLoad( - ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), - MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), - LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), - ISD::ZEXTLOAD, MLoad->isExpandingLoad()); - } + uint64_t ElementSize = + LoadVT.getVectorElementType().getScalarSizeInBits(); + if (Splat->getAPIntValue().isMask(ElementSize)) { + return DAG.getMaskedLoad( + ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), + MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), + LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), + ISD::ZEXTLOAD, MLoad->isExpandingLoad()); } } } @@ -6011,7 +6214,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + if (const ConstantSDNode *C = isConstOrConstSplat(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { APInt SplatValue, SplatUndef; @@ -6151,6 +6354,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) + return R; + // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) @@ -6209,9 +6417,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; - if (TLI.hasBitTest(N0, N1)) - if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) - return V; + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; // Recognize the following pattern: // @@ -6261,11 +6468,11 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool LookPassAnd0 = false; bool LookPassAnd1 = false; if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) - std::swap(N0, N1); + std::swap(N0, N1); if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) - std::swap(N0, N1); + std::swap(N0, N1); if (N0.getOpcode() == ISD::AND) { - if (!N0.getNode()->hasOneUse()) + if (!N0->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); // Also handle 0xffff since the LHS is guaranteed to have zeros there. @@ -6278,7 +6485,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, } if (N1.getOpcode() == ISD::AND) { - if (!N1.getNode()->hasOneUse()) + if (!N1->hasOneUse()) return SDValue(); ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); if (!N11C || N11C->getZExtValue() != 0xFF) @@ -6291,7 +6498,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); - if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) + if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); @@ -6304,7 +6511,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) SDValue N00 = N0->getOperand(0); if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { - if (!N00.getNode()->hasOneUse()) + if (!N00->hasOneUse()) return SDValue(); ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); if (!N001C || N001C->getZExtValue() != 0xFF) @@ -6315,7 +6522,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, SDValue N10 = N1->getOperand(0); if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { - if (!N10.getNode()->hasOneUse()) + if (!N10->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); // Also allow 0xFFFF since the bits will be shifted out. This is needed @@ -6333,19 +6540,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, // Make sure everything beyond the low halfword gets set to zero since the SRL // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16) { + if (OpSizeInBits > 16) { // If the left-shift isn't masked out then the only way this is a bswap is // if all bits beyond the low 8 are 0. In that case the entire pattern // reduces to a left shift anyway: leave it for other parts of the combiner. - if (!LookPassAnd0) + if (DemandHighBits && !LookPassAnd0) return SDValue(); // However, if the right shift isn't masked out then it might be because - // it's not needed. See if we can spot that too. - if (!LookPassAnd1 && - !DAG.MaskedValueIsZero( - N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) - return SDValue(); + // it's not needed. See if we can spot that too. If the high bits aren't + // demanded, we only need bits 23:16 to be zero. Otherwise, we need all + // upper bits to be zero. + if (!LookPassAnd1) { + unsigned HighBit = DemandHighBits ? OpSizeInBits : 24; + if (!DAG.MaskedValueIsZero(N10, + APInt::getBitsSet(OpSizeInBits, 16, HighBit))) + return SDValue(); + } } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); @@ -6365,7 +6576,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// ((x & 0x00ff0000) << 8) | /// ((x & 0xff000000) >> 8) static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { - if (!N.getNode()->hasOneUse()) + if (!N->hasOneUse()) return false; unsigned Opc = N.getOpcode(); @@ -6552,8 +6763,9 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } else + } else { return SDValue(); + } // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -6591,7 +6803,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + (N0->hasOneUse() || N1->hasOneUse())) { // We can only do this xform if we know that bits from X that are set in C2 // but not in C1 are already zero. Likewise for Y. if (const ConstantSDNode *N0O1C = @@ -6619,7 +6831,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { N1.getOpcode() == ISD::AND && N0.getOperand(0) == N1.getOperand(0) && // Don't increase # computations. - (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + (N0->hasOneUse() || N1->hasOneUse())) { SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(1), N1.getOperand(1)); return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); @@ -6634,14 +6846,38 @@ static SDValue visitORCommutative( EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) - if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1) + // TODO: Set AllowUndefs = true. + if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0), + /* AllowUndefs */ false) == N1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) - if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1) + if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1), + /* AllowUndefs */ false) == N1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); } + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + + auto peekThroughZext = [](SDValue V) { + if (V->getOpcode() == ISD::ZERO_EXTEND) + return V->getOperand(0); + return V; + }; + + // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y + if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL && + N0.getOperand(0) == N1.getOperand(0) && + peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) + return N0; + + // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y + if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL && + N0.getOperand(1) == N1.getOperand(0) && + peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) + return N0; + return SDValue(); } @@ -6678,11 +6914,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) - // Do this only if the resulting shuffle is legal. - if (isa<ShuffleVectorSDNode>(N0) && - isa<ShuffleVectorSDNode>(N1) && - // Avoid folding a node with illegal type. - TLI.isTypeLegal(VT)) { + // Do this only if the resulting type / shuffle is legal. + auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0); + auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1); + if (SV0 && SV1 && TLI.isTypeLegal(VT)) { bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); @@ -6691,11 +6926,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); - const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); - const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); bool CanFold = true; int NumElts = VT.getVectorNumElements(); - SmallVector<int, 4> Mask(NumElts); + SmallVector<int, 4> Mask(NumElts, -1); for (int i = 0; i != NumElts; ++i) { int M0 = SV0->getMaskElt(i); @@ -6707,10 +6940,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // If one element is zero and the otherside is undef, keep undef. // This also handles the case that both are undef. - if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { - Mask[i] = -1; + if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) continue; - } // Make sure only one of the elements is zero. if (M0Zero == M1Zero) { @@ -6778,7 +7009,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); }; - if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + if (N0.getOpcode() == ISD::AND && N0->hasOneUse() && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, {N1, N0.getOperand(1)})) { @@ -7098,8 +7329,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // Neg with outer conversions stripped away. SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, - SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, const SDLoc &DL) { + SDValue InnerNeg, bool HasPos, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) @@ -7110,7 +7342,6 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, EVT VT = Shifted.getValueType(); if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, /*IsRotate*/ true)) { - bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); } @@ -7126,8 +7357,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // TODO: Merge with MatchRotatePosNeg. SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, SDValue InnerPos, - SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, const SDLoc &DL) { + SDValue InnerNeg, bool HasPos, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL) { EVT VT = N0.getValueType(); unsigned EltBits = VT.getScalarSizeInBits(); @@ -7139,7 +7371,6 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { - bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); } @@ -7201,6 +7432,16 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { bool HasROTR = hasOperation(ISD::ROTR, VT); bool HasFSHL = hasOperation(ISD::FSHL, VT); bool HasFSHR = hasOperation(ISD::FSHR, VT); + + // If the type is going to be promoted and the target has enabled custom + // lowering for rotate, allow matching rotate by non-constants. Only allow + // this for scalar types. + if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) == + TargetLowering::TypePromoteInteger) { + HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom; + HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom; + } + if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); @@ -7254,11 +7495,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. - // TODO: Support pre-legalization funnel-shift by constant. - bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); - if (!IsRotate && !(HasFSHL || HasFSHR)) - return SDValue(); // Requires funnel shift support. - // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); @@ -7272,27 +7508,12 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); - // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) - // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) - // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) - // iff C1+C2 == EltSizeInBits auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; }; - if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { - SDValue Res; - if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { - bool UseROTL = !LegalOperations || HasROTL; - Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - UseROTL ? LHSShiftAmt : RHSShiftAmt); - } else { - bool UseFSHL = !LegalOperations || HasFSHL; - Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, - RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); - } + auto ApplyMasks = [&](SDValue Res) { // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); @@ -7313,6 +7534,71 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } return Res; + }; + + // TODO: Support pre-legalization funnel-shift by constant. + bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); + if (!IsRotate && !(HasFSHL || HasFSHR)) { + if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() && + ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + // Look for a disguised rotate by constant. + // The common shifted operand X may be hidden inside another 'or'. + SDValue X, Y; + auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) { + if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR) + return false; + if (CommonOp == Or.getOperand(0)) { + X = CommonOp; + Y = Or.getOperand(1); + return true; + } + if (CommonOp == Or.getOperand(1)) { + X = CommonOp; + Y = Or.getOperand(0); + return true; + } + return false; + }; + + SDValue Res; + if (matchOr(LHSShiftArg, RHSShiftArg)) { + // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1) + SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt); + SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt); + Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY); + } else if (matchOr(RHSShiftArg, LHSShiftArg)) { + // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2) + SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt); + SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt); + Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY); + } else { + return SDValue(); + } + + return ApplyMasks(Res); + } + + return SDValue(); // Requires funnel shift support. + } + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) + // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) + // iff C1+C2 == EltSizeInBits + if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + SDValue Res; + if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { + bool UseROTL = !LegalOperations || HasROTL; + Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + UseROTL ? LHSShiftAmt : RHSShiftAmt); + } else { + bool UseFSHL = !LegalOperations || HasFSHL; + Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); + } + + return ApplyMasks(Res); } // Even pre-legalization, we can't easily rotate/funnel-shift by a variable @@ -7343,26 +7629,26 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (IsRotate && (HasROTL || HasROTR)) { SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, - RExtOp0, ISD::ROTL, ISD::ROTR, DL); + RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, - LExtOp0, ISD::ROTR, ISD::ROTL, DL); + LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; } SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt, - LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL); + LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL); if (TryL) return TryL; SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt, - RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL); + RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL); if (TryR) return TryR; @@ -7877,7 +8163,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // little endian value load Optional<bool> IsBigEndian = isBigEndian( makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); - if (!IsBigEndian.hasValue()) + if (!IsBigEndian) return SDValue(); assert(FirstByteProvider && "must be set"); @@ -8084,6 +8370,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; + // look for 'add-like' folds: + // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) + if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && + isMinSignedConstant(N1)) + if (SDValue Combined = visitADDLike(N)) + return Combined; + // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; @@ -8249,6 +8542,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) return V; + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) + return R; + if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) + return R; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) return MM; @@ -8479,7 +8777,9 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } unsigned NextOp = N0.getOpcode(); - // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + + // fold (rot* (rot* x, c2), c1) + // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); @@ -8487,14 +8787,19 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { EVT ShiftVT = C1->getValueType(0); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; - if (SDValue CombinedShift = DAG.FoldConstantArithmetic( - CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) { - SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); - SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( - ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC}); - return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), - CombinedShiftNorm); - } + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); + SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, + {N1, BitsizeC}); + SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT, + {N0.getOperand(1), BitsizeC}); + if (Norm1 && Norm2) + if (SDValue CombinedShift = DAG.FoldConstantArithmetic( + CombineOp, dl, ShiftVT, {Norm1, Norm2})) { + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC}); + return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), + CombinedShiftNorm); + } } } return SDValue(); @@ -8654,52 +8959,63 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } } - // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 - // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && - N0->getFlags().hasExact()) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - uint64_t C1 = N0C1->getZExtValue(); - uint64_t C2 = N1C->getZExtValue(); - SDLoc DL(N); - if (C1 <= C2) - return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(C2 - C1, DL, ShiftVT)); - return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), - DAG.getConstant(C1 - C2, DL, ShiftVT)); + if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) { + auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + const APInt &LHSC = LHS->getAPIntValue(); + const APInt &RHSC = RHS->getAPIntValue(); + return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && + LHSC.getZExtValue() <= RHSC.getZExtValue(); + }; + + SDLoc DL(N); + + // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 + // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2 + if (N0->getFlags().hasExact()) { + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + } + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff); + } } - } - // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or - // (and (srl x, (sub c1, c2), MASK) - // Only fold this if the inner shift has no other uses -- if it does, folding - // this will increase the total number of instructions. - // TODO - drop hasOneUse requirement if c1 == c2? - // TODO - support non-uniform vector shift amounts. - if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - if (N0C1->getAPIntValue().ult(OpSizeInBits)) { - uint64_t c1 = N0C1->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); - SDValue Shift; - if (c2 > c1) { - Mask <<= c2 - c1; - SDLoc DL(N); - Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(c2 - c1, DL, ShiftVT)); - } else { - Mask.lshrInPlace(c1 - c2); - SDLoc DL(N); - Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(c1 - c2, DL, ShiftVT)); - } - SDLoc DL(N0); - return DAG.getNode(ISD::AND, DL, VT, Shift, - DAG.getConstant(Mask, DL, VT)); + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) + // Only fold this if the inner shift has no other uses -- if it does, + // folding this will increase the total number of instructions. + if (N0.getOpcode() == ISD::SRL && + (N0.getOperand(1) == N1 || N0.hasOneUse()) && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } } } @@ -8718,7 +9034,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && - N0.getNode()->hasOneUse() && + N0->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && TLI.isDesirableToCommuteWithShift(N, Level)) { @@ -8730,14 +9046,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) - if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && - isConstantOrConstantVector(N1, /* No Opaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { - SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); - if (isConstantOrConstantVector(Shl)) + if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) { + SDValue N01 = N0.getOperand(1); + if (SDValue Shl = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); } + ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && !N1C->isOpaque()) if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; @@ -9023,8 +9339,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits; if (LargeShift->getAPIntValue() == TruncBits) { SDLoc DL(N); - SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL, - getShiftAmountTy(LargeVT)); + EVT LargeShiftVT = getShiftAmountTy(LargeVT); + SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT); + Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt, + DAG.getConstant(TruncBits, DL, LargeShiftVT)); SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); @@ -9063,6 +9381,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return V; EVT VT = N0.getValueType(); + EVT ShiftVT = N1.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 @@ -9104,7 +9423,6 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { }; if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); - EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); } @@ -9148,15 +9466,41 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - // fold (srl (shl x, c), c) -> (and x, cst2) - // TODO - (srl (shl x, c1), c2). - if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - isConstantOrConstantVector(N1, /* NoOpaques */ true)) { - SDLoc DL(N); - SDValue Mask = - DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); - AddToWorklist(Mask.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or + // (and (srl x, (sub c2, c1), MASK) + if (N0.getOpcode() == ISD::SHL && + (N0.getOperand(1) == N1 || N0->hasOneUse()) && + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + const APInt &LHSC = LHS->getAPIntValue(); + const APInt &RHSC = RHS->getAPIntValue(); + return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && + LHSC.getZExtValue() <= RHSC.getZExtValue(); + }; + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDLoc DL(N); + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) @@ -9412,6 +9756,21 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1})) return C; + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) { + // fold (sshlsat x, c) -> (shl x, c) + if (N->getOpcode() == ISD::SSHLSAT && N1C && + N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0))) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); + + // fold (ushlsat x, c) -> (shl x, c) + if (N->getOpcode() == ISD::USHLSAT && N1C && + N1C->getAPIntValue().ule( + DAG.computeKnownBits(N0).countMinLeadingZeros())) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1); + } + return SDValue(); } @@ -9435,18 +9794,27 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) return SDValue(); + EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); - // Check if the operands are of same type and valid size. unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; - if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) - return SDValue(); - Op0 = Op0.getOperand(0); - Op1 = Op1.getOperand(0); - SDValue ABD = - DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1); - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD); + // fold abs(sext(x) - sext(y)) -> zext(abds(x, y)) + // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y)) + // NOTE: Extensions must be equivalent. + if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) { + Op0 = Op0.getOperand(0); + Op1 = Op1.getOperand(0); + SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD); + } + + // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y)) + // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y)) + if (TLI.isOperationLegalOrCustom(ABDOpcode, VT)) + return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1); + + return SDValue(); } SDValue DAGCombiner::visitABS(SDNode *N) { @@ -9472,24 +9840,60 @@ SDValue DAGCombiner::visitABS(SDNode *N) { SDValue DAGCombiner::visitBSWAP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + SDLoc DL(N); // fold (bswap c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + return DAG.getNode(ISD::BSWAP, DL, VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) - return N0->getOperand(0); + return N0.getOperand(0); // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse // isn't supported, it will be expanded to bswap followed by a manual reversal // of bits in each byte. By placing bswaps before bitreverse, we can remove // the two bswaps if the bitreverse gets expanded. if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) { - SDLoc DL(N); SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap); } + // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2)))))) + // iff x >= bw/2 (i.e. lower half is known zero) + unsigned BW = VT.getScalarSizeInBits(); + if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) { + auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2); + if (ShAmt && ShAmt->getAPIntValue().ult(BW) && + ShAmt->getZExtValue() >= (BW / 2) && + (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) && + TLI.isTruncateFree(VT, HalfVT) && + (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) { + SDValue Res = N0.getOperand(0); + if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2))) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT))); + Res = DAG.getZExtOrTrunc(Res, DL, HalfVT); + Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res); + return DAG.getZExtOrTrunc(Res, DL, VT); + } + } + + // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as + // inverse-shift-of-bswap: + // bswap (X u<< C) --> (bswap X) u>> C + // bswap (X u>> C) --> (bswap X) u<< C + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + N0.hasOneUse()) { + auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (ShAmt && ShAmt->getAPIntValue().ult(BW) && + ShAmt->getZExtValue() % 8 == 0) { + SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0)); + unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL; + return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1)); + } + } + return SDValue(); } @@ -9740,7 +10144,8 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (C1Val.isPowerOf2() && C2Val.isZero()) { if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); } @@ -10023,7 +10428,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) { // Any flags available in a select/setcc fold will be on the setcc as they // migrated from fcmp - Flags = N0.getNode()->getFlags(); + Flags = N0->getFlags(); SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, N0.getOperand(2)); SelectNode->setFlags(Flags); @@ -10096,14 +10501,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1)); } -bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { +bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, + SelectionDAG &DAG) { if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) return false; + // Only perform the transformation when existing operands can be reused. + if (IndexIsScaled) + return false; + // For now we check only the LHS of the add. SDValue LHS = Index.getOperand(0); SDValue SplatVal = DAG.getSplatValue(LHS); - if (!SplatVal) + if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType()) return false; BasePtr = SplatVal; @@ -10112,23 +10522,29 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { } // Fold sext/zext of index into index type. -bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index, - bool Scaled, SelectionDAG &DAG) { +bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, + SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // It's always safe to look through zero extends. if (Index.getOpcode() == ISD::ZERO_EXTEND) { SDValue Op = Index.getOperand(0); - MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { + IndexType = ISD::UNSIGNED_SCALED; Index = Op; return true; } + if (ISD::isIndexTypeSigned(IndexType)) { + IndexType = ISD::UNSIGNED_SCALED; + return true; + } } - if (Index.getOpcode() == ISD::SIGN_EXTEND) { + // It's only safe to look through sign extends when Index is signed. + if (Index.getOpcode() == ISD::SIGN_EXTEND && + ISD::isIndexTypeSigned(IndexType)) { SDValue Op = Index.getOperand(0); - MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { Index = Op; return true; } @@ -10145,24 +10561,25 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { SDValue Scale = MSC->getScale(); SDValue StoreVal = MSC->getValue(); SDValue BasePtr = MSC->getBasePtr(); + ISD::MemIndexType IndexType = MSC->getIndexType(); SDLoc DL(N); // Zap scatters with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - if (refineUniformBase(BasePtr, Index, DAG)) { + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter( - DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, - MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType, + MSC->isTruncatingStore()); } - if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) { + if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedScatter( - DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, - MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType, + MSC->isTruncatingStore()); } return SDValue(); @@ -10217,7 +10634,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // If this is a TRUNC followed by a masked store, fold this into a masked // truncating store. We can do this even if this is already a masked // truncstore. - if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() && + if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() && MST->isUnindexed() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), MST->getMemoryVT(), LegalOperations)) { @@ -10240,26 +10657,25 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { SDValue Scale = MGT->getScale(); SDValue PassThru = MGT->getPassThru(); SDValue BasePtr = MGT->getBasePtr(); + ISD::MemIndexType IndexType = MGT->getIndexType(); SDLoc DL(N); // Zap gathers with a zero mask. if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, PassThru, MGT->getChain()); - if (refineUniformBase(BasePtr, Index, DAG)) { + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), - MGT->getMemoryVT(), DL, Ops, - MGT->getMemOperand(), MGT->getIndexType(), - MGT->getExtensionType()); + return DAG.getMaskedGather( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } - if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) { + if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; - return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other), - MGT->getMemoryVT(), DL, Ops, - MGT->getMemOperand(), MGT->getIndexType(), - MGT->getExtensionType()); + return DAG.getMaskedGather( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType()); } return SDValue(); @@ -10513,23 +10929,25 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { Other = N1; } + // zext(x) >= y ? trunc(zext(x) - y) : 0 + // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit))) + // zext(x) > y ? trunc(zext(x) - y) : 0 + // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit))) + if (Other && Other.getOpcode() == ISD::TRUNCATE && + Other.getOperand(0).getOpcode() == ISD::SUB && + (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) { + SDValue OpLHS = Other.getOperand(0).getOperand(0); + SDValue OpRHS = Other.getOperand(0).getOperand(1); + if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND) + if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, + DAG, DL)) + return R; + } + if (Other && Other.getNumOperands() == 2) { SDValue CondRHS = RHS; SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); - if (Other.getOpcode() == ISD::SUB && - LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS && - OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) { - // Look for a general sub with unsigned saturation first. - // zext(x) >= y ? x - trunc(y) : 0 - // --> usubsat(x,trunc(umin(y,SatLimit))) - // zext(x) > y ? x - trunc(y) : 0 - // --> usubsat(x,trunc(umin(y,SatLimit))) - if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) - return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG, - DL); - } - if (OpLHS == LHS) { // Look for a general sub with unsigned saturation first. // x >= y ? x-y : 0 --> usubsat x, y @@ -10560,8 +10978,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. - // FIXME: Would it be better to use computeKnownBits to determine - // whether it's safe to decanonicalize the xor? + // FIXME: Would it be better to use computeKnownBits to + // determine whether it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> usubsat x, C APInt SplatValue; if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR && @@ -10627,17 +11045,18 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isZero()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } else if (SCC->isUndef()) { - // When the condition is UNDEF, just return the first operand. This is - // coherent the DAG creation, no setcc node is created in this case + // cond always true -> true val + // cond always false -> false val + if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) + return SCCC->isZero() ? N3 : N2; + + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + if (SCC->isUndef()) return N2; - } else if (SCC.getOpcode() == ISD::SETCC) { - // Fold to a simpler select_cc + + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) { SDValue SelectOp = DAG.getNode( ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); @@ -10920,9 +11339,8 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); - for (SDNode::use_iterator UI = N0.getNode()->use_begin(), - UE = N0.getNode()->use_end(); - UI != UE; ++UI) { + for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE; + ++UI) { SDNode *User = *UI; if (User == N) continue; @@ -11254,9 +11672,12 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { + // TODO: isFixedLengthVector() should be removed and any negative effects on + // code generation being the result of that target's implementation of + // isVectorLoadExtDesirable(). if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || - ((LegalOperations || VT.isVector() || + ((LegalOperations || VT.isFixedLengthVector() || !cast<LoadSDNode>(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -11480,6 +11901,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // sext(undef) = 0 because the top bit will all be the same. + if (N0.isUndef()) + return DAG.getConstant(0, DL, VT); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -11649,10 +12074,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Return SDValue here as the xor should have already been replaced in // this sext. return SDValue(); - } else { - // Return a new sext with the new xor. - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); } + + // Return a new sext with the new xor. + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); } SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); @@ -11725,6 +12150,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // zext(undef) = 0 + if (N0.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -11984,6 +12413,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // aext(undef) = undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -12021,11 +12454,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), N0.getValueType())) { SDLoc DL(N); - SDValue X = N0.getOperand(0).getOperand(0); - X = DAG.getAnyExtOrTrunc(X, DL, VT); - APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, DL, VT, - X, DAG.getConstant(Mask, DL, VT)); + SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT); + SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1)); + assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!"); + return DAG.getNode(ISD::AND, DL, VT, X, Y); } // fold (aext (load x)) -> (aext (truncate (extload x))) @@ -12153,13 +12585,9 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { // This eliminates the later assert: // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN + SDLoc DL(N); SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); - assert(BigA_AssertVT.bitsLE(N0.getValueType()) && - "Asserting zero/sign-extended bits to a type larger than the " - "truncated destination does not provide information"); - - SDLoc DL(N); EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), @@ -12175,10 +12603,6 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { Opcode == ISD::AssertZext) { SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); - assert(BigA_AssertVT.bitsLE(N0.getValueType()) && - "Asserting zero/sign-extended bits to a type larger than the " - "truncated destination does not provide information"); - if (AssertVT.bitsLT(BigA_AssertVT)) { SDLoc DL(N); SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), @@ -12296,13 +12720,11 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { unsigned ActiveBits = 0; if (Mask.isMask()) { ActiveBits = Mask.countTrailingOnes(); - } else if (Mask.isShiftedMask()) { - ShAmt = Mask.countTrailingZeros(); - APInt ShiftedMask = Mask.lshr(ShAmt); - ActiveBits = ShiftedMask.countTrailingOnes(); + } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) { HasShiftedOffset = true; - } else + } else { return SDValue(); + } ExtType = ISD::ZEXTLOAD; ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -12919,21 +13341,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) - // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) - // When the adde's carry is not used. - if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && - N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && - // We only do for addcarry before legalize operation - ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || - TLI.isOperationLegal(N0.getOpcode(), VT))) { - SDLoc SL(N); - auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); - auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); - auto VTs = DAG.getVTList(VT, N0->getValueType(1)); - return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); - } - // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract @@ -12978,6 +13385,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } break; + case ISD::ADDE: + case ISD::ADDCARRY: + // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) + // When the adde's carry is not used. + // We only do for addcarry before legalize operation + if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) || + TLI.isOperationLegal(N0.getOpcode(), VT)) && + N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) { + SDLoc DL(N); + SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); + SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); + SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1)); + return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2)); + } + break; case ISD::USUBSAT: // Truncate the USUBSAT only if LHS is a known zero-extension, its not // enough to know that the upper bits are zero we must ensure that we don't @@ -13111,7 +13534,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (!LegalTypes || (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() && TLI.isTypeLegal(VT.getVectorElementType()))) && - N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() && cast<BuildVectorSDNode>(N0)->isConstant()) return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), VT.getVectorElementType()); @@ -13179,8 +13602,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && - N0.getNode()->hasOneUse() && VT.isInteger() && - !VT.isVector() && !N0.getValueType().isVector()) { + N0->hasOneUse() && VT.isInteger() && !VT.isVector() && + !N0.getValueType().isVector()) { SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); @@ -13228,9 +13651,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (xor (bitcast cst), (bitcast x)), 0), // signbit) // (xor (bitcast cst) (build_pair flipbit, flipbit)) - if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && - isa<ConstantFPSDNode>(N0.getOperand(0)) && - VT.isInteger() && !VT.isVector()) { + if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() && + !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { @@ -13312,8 +13735,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); - if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || - ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + if (Op.isUndef() || isAnyConstantBuildVector(Op)) return DAG.getBitcast(VT, Op); return SDValue(); }; @@ -13353,6 +13775,14 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; + // Fold freeze(bitcast(x)) -> bitcast(freeze(x)). + // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold. + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getBitcast(N->getValueType(0), + DAG.getNode(ISD::FREEZE, SDLoc(N0), + N0.getOperand(0).getValueType(), + N0.getOperand(0))); + return SDValue(); } @@ -13444,7 +13874,7 @@ static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { // Returns true if `N` can assume no infinities involved in its computation. static bool hasNoInfs(const TargetOptions &Options, SDValue N) { - return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs(); + return Options.NoInfsFPMath || N->getFlags().hasNoInfs(); } /// Try to perform FMA combining on a given FADD node. @@ -13498,7 +13928,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { - if (N0.getNode()->use_size() > N1.getNode()->use_size()) + if (N0->use_size() > N1->use_size()) std::swap(N0, N1); } @@ -13728,7 +14158,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)), // prefer to fold the multiply with fewer uses. if (isContractableFMUL(N0) && isContractableFMUL(N1) && - (N0.getNode()->use_size() > N1.getNode()->use_size())) { + (N0->use_size() > N1->use_size())) { // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b)) if (SDValue V = tryToFoldXSubYZ(N0, N1)) return V; @@ -14851,7 +15281,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1})) return C; - + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -15174,7 +15604,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { // This means this is also safe for a signed input and unsigned output, since // a negative input would lead to undefined behavior. unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; - unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits(); unsigned ActualSize = std::min(InputSize, OutputSize); const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); @@ -15265,7 +15695,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) - if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(Tmp.getNode()); @@ -15709,7 +16139,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || - Ptr.getNode()->hasOneUse()) + Ptr->hasOneUse()) return false; // Ask the target to do addressing mode selection. @@ -15769,8 +16199,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; if (isa<ConstantSDNode>(Offset)) - for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), - UE = BasePtr.getNode()->use_end(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { SDUse &Use = UI.getUse(); // Skip the use that is Ptr and uses of other results from BasePtr's @@ -15808,7 +16238,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; - for (SDNode *Use : Ptr.getNode()->uses()) { + for (SDNode *Use : Ptr->uses()) { if (Use == N) continue; if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) @@ -15841,7 +16271,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; - Result.getNode()->dump(&DAG); dbgs() << '\n'); + Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -15931,7 +16361,7 @@ static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, return false; SmallPtrSet<const SDNode *, 32> Visited; - for (SDNode *Use : BasePtr.getNode()->uses()) { + for (SDNode *Use : BasePtr->uses()) { if (Use == Ptr.getNode()) continue; @@ -15968,7 +16398,7 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, const TargetLowering &TLI) { if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, Ptr, TLI) || - Ptr.getNode()->hasOneUse()) + Ptr->hasOneUse()) return nullptr; // Try turning it into a post-indexed load / store except when @@ -16028,9 +16458,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); - dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; + Result.dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -16271,7 +16700,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); - dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); + dbgs() << "\nWith chain: "; Chain.dump(&DAG); dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); @@ -16302,7 +16731,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else Index = DAG.getUNDEF(N->getValueType(1)); LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); - dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); + dbgs() << "\nWith: "; Undef.dump(&DAG); dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); @@ -17014,11 +17443,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization (and the target doesn't explicitly think this is a bad idea). + // legalization. If the source type is legal, but the store type isn't, see + // if we can use a truncating store. MVT VT = MVT::getIntegerVT(NumBytes * 8); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!DC->isTypeLegal(VT)) + bool UseTruncStore; + if (DC->isTypeLegal(VT)) + UseTruncStore = false; + else if (TLI.isTypeLegal(IVal.getValueType()) && + TLI.isTruncStoreLegal(IVal.getValueType(), VT)) + UseTruncStore = true; + else return SDValue(); + // Check that the target doesn't think this is a bad idea. if (St->getMemOperand() && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *St->getMemOperand())) @@ -17046,10 +17483,15 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); } + ++OpsNarrowed; + if (UseTruncStore) + return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), + VT, St->getOriginalAlign()); + // Truncate down to the new size. IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); - ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), @@ -17070,11 +17512,15 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue Ptr = ST->getBasePtr(); EVT VT = Value.getValueType(); - if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + if (ST->isTruncatingStore() || VT.isVector()) return SDValue(); unsigned Opc = Value.getOpcode(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + !Value.hasOneUse()) + return SDValue(); + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst // is a byte mask indicating a consecutive number of bytes, check to see if // Y is known to provide just those bytes. If so, we try to replace the @@ -17099,8 +17545,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (!EnableReduceLoadOpStoreWidth) return SDValue(); - if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || - Value.getOperand(1).getOpcode() != ISD::Constant) + if (Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); SDValue N0 = Value.getOperand(0); @@ -17256,14 +17701,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { // (A + c1) * c3 // (A + c2) * c3 // We're checking for cases where we have common "c3 * A" expressions. -bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, - SDValue &AddNode, - SDValue &ConstNode) { +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode, + SDValue ConstNode) { APInt Val; // If the add only has one use, and the target thinks the folding is // profitable or does not lead to worse code, this would be OK to do. - if (AddNode.getNode()->hasOneUse() && + if (AddNode->hasOneUse() && TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; @@ -17397,7 +17841,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( if (isa<ConstantFPSDNode>(Val)) { // Not clear how to truncate FP values. return false; - } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) + } + + if (auto *C = dyn_cast<ConstantSDNode>(Val)) Val = DAG.getConstant(C->getAPIntValue() .zextOrTrunc(Val.getValueSizeInBits()) .zextOrTrunc(ElementSizeBits), @@ -17491,7 +17937,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( if (!UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), - FirstInChain->getAlign(), Flags.getValue(), AAInfo); + FirstInChain->getAlign(), *Flags, AAInfo); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17503,7 +17949,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlign(), Flags.getValue(), AAInfo); + FirstInChain->getAlign(), *Flags, AAInfo); } // Replace all merged stores with the new store. @@ -17671,11 +18117,9 @@ void DAGCombiner::getStoreMergeCandidates( } } -// We need to check that merging these stores does not cause a loop in -// the DAG. Any store candidate may depend on another candidate -// indirectly through its operand (we already consider dependencies -// through the chain). Check in parallel by searching up from -// non-chain operands of candidates. +// We need to check that merging these stores does not cause a loop in the +// DAG. Any store candidate may depend on another candidate indirectly through +// its operands. Check in parallel by searching up from operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, SDNode *RootNode) { @@ -17709,8 +18153,13 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SDNode *N = StoreNodes[i].MemNode; // Of the 4 Store Operands: // * Chain (Op 0) -> We have already considered these - // in candidate selection and can be - // safely ignored + // in candidate selection, but only by following the + // chain dependencies. We could still have a chain + // dependency to a load, that has a non-chain dep to + // another load, that depends on a store, etc. So it is + // possible to have dependencies that consist of a mix + // of chain and non-chain deps, and we need to include + // chain operands in the analysis here.. // * Value (Op 1) -> Cycles may happen (e.g. through load chains) // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, // but aren't necessarily fromt the same base node, so @@ -17718,7 +18167,7 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // * (Op 3) -> Represents the pre or post-indexing offset (or undef for // non-indexed stores). Not constant on all targets (e.g. ARM) // and so can participate in a cycle. - for (unsigned j = 1; j < N->getNumOperands(); ++j) + for (unsigned j = 0; j < N->getNumOperands(); ++j) Worklist.push_back(N->getOperand(j).getNode()); } // Search through DAG. We can stop early if we find a store node. @@ -17793,7 +18242,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( while (NumConsecutiveStores >= 2) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); + Align FirstStoreAlign = FirstInChain->getAlign(); unsigned LastLegalType = 1; unsigned LastLegalVectorType = 1; bool LastIntegerTrunc = false; @@ -17881,7 +18330,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( unsigned NumSkip = 1; while ((NumSkip < NumConsecutiveStores) && (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); @@ -17920,7 +18369,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts( while (NumConsecutiveStores >= 2) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); + Align FirstStoreAlign = FirstInChain->getAlign(); unsigned NumStoresToMerge = 1; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { // Find a legal type for the vector store. @@ -17951,7 +18400,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts( // improved. Drop as many candidates as we can here. unsigned NumSkip = 1; while ((NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign)) NumSkip++; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); @@ -18248,7 +18697,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, for (unsigned i = 0; i < NumElem; ++i) { SDValue Val = StoreNodes[i].MemNode->getOperand(1); CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) + if (Val->use_empty()) recursivelyDeleteUnusedNodes(Val.getNode()); } @@ -18398,6 +18847,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. + case MVT::bf16: case MVT::f80: case MVT::f128: case MVT::ppcf128: @@ -18405,7 +18855,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::f32: if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { - ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), SDLoc(CFP), MVT::i32); @@ -18417,7 +18866,6 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { - ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, DL, Tmp, @@ -18611,7 +19059,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) && - Value.getNode()->hasOneUse() && ST->isUnindexed() && + Value->hasOneUse() && ST->isUnindexed() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), ST->getMemoryVT(), LegalOperations)) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), @@ -18874,6 +19322,14 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { } } + // If we failed to find a match, see if we can replace an UNDEF shuffle + // operand. + if (ElementOffset == -1 && Y.isUndef() && + InsertVal0.getValueType() == Y.getValueType()) { + ElementOffset = Mask.size(); + Y = InsertVal0; + } + if (ElementOffset != -1) { SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); @@ -18972,10 +19428,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { if (VT.isScalableVector()) return DAG.getSplatVector(VT, DL, InVal); - else { - SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); - return DAG.getBuildVector(VT, DL, Ops); - } + + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); + return DAG.getBuildVector(VT, DL, Ops); } return SDValue(); } @@ -18987,9 +19442,19 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) return Shuf; + // Handle <1 x ???> vector insertion special cases. + if (VT.getVectorNumElements() == 1) { + // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + InVal.getOperand(0).getValueType() == VT && + isNullConstant(InVal.getOperand(1))) + return InVal.getOperand(0); + } + // Canonicalize insert_vector_elt dag nodes. // Example: // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) @@ -19010,36 +19475,84 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } - // If we can't generate a legal BUILD_VECTOR, exit - if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return SDValue(); + // Attempt to fold the insertion into a legal BUILD_VECTOR. + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { + auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { + assert(Ops.size() == NumElts && "Unexpected vector size"); - // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially - // be converted to a BUILD_VECTOR). Fill in the Ops vector with the - // vector elements. - SmallVector<SDValue, 8> Ops; - // Do not combine these two vectors if the output vector will not replace - // the input vector. - if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { - Ops.append(InVec.getNode()->op_begin(), - InVec.getNode()->op_end()); - } else if (InVec.isUndef()) { - Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); - } else { - return SDValue(); - } - assert(Ops.size() == NumElts && "Unexpected vector size"); + // Insert the element + if (Elt < Ops.size()) { + // All the operands of BUILD_VECTOR must have the same type; + // we enforce that here. + EVT OpVT = Ops[0].getValueType(); + Ops[Elt] = + OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + } + + // Return the new vector + return DAG.getBuildVector(VT, DL, Ops); + }; + + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially + // be converted to a BUILD_VECTOR). Fill in the Ops vector with the + // vector elements. + SmallVector<SDValue, 8> Ops; + + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { + Ops.append(InVec->op_begin(), InVec->op_end()); + return UpdateBuildVector(Ops); + } + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) { + Ops.push_back(InVec.getOperand(0)); + Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType())); + return UpdateBuildVector(Ops); + } + + if (InVec.isUndef()) { + Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); + return UpdateBuildVector(Ops); + } + + // If we're inserting into the end of a vector as part of an sequence, see + // if we can create a BUILD_VECTOR by following the sequence back up the + // chain. + if (Elt == (NumElts - 1)) { + SmallVector<SDValue> ReverseInsertions; + ReverseInsertions.push_back(InVal); + + EVT MaxEltVT = InVal.getValueType(); + SDValue CurVec = InVec; + for (unsigned I = 1; I != NumElts; ++I) { + if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse()) + break; - // Insert the element - if (Elt < Ops.size()) { - // All the operands of BUILD_VECTOR must have the same type; - // we enforce that here. - EVT OpVT = Ops[0].getValueType(); - Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)); + if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I)) + break; + SDValue CurVal = CurVec.getOperand(1); + ReverseInsertions.push_back(CurVal); + if (VT.isInteger()) { + EVT CurValVT = CurVal.getValueType(); + MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT; + } + CurVec = CurVec.getOperand(0); + } + + if (ReverseInsertions.size() == NumElts) { + for (unsigned I = 0; I != NumElts; ++I) { + SDValue Val = ReverseInsertions[(NumElts - 1) - I]; + Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val; + Ops.push_back(Val); + } + return DAG.getBuildVector(VT, DL, Ops); + } + } } - // Return the new vector - return DAG.getBuildVector(VT, DL, Ops); + return SDValue(); } SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, @@ -19088,47 +19601,33 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. + // We are replacing a vector load with a scalar load. The new load must have + // identical memory op ordering to the original. SDValue Load; - SDValue Chain; if (ResultVT.bitsGT(VecEltVT)) { // If the result type of vextract is wider than the load, then issue an // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, - VecEltVT) - ? ISD::ZEXTLOAD - : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, - OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, - Alignment, OriginalLoad->getMemOperand()->getFlags(), + ISD::LoadExtType ExtType = + TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(), + NewPtr, MPI, VecEltVT, Alignment, + OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); - Chain = Load.getValue(1); + DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); } else { - Load = DAG.getLoad( - VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment, - OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); - Chain = Load.getValue(1); + // The result type is narrower or the same width as the vector element + Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI, + Alignment, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); + DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load); if (ResultVT.bitsLT(VecEltVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load); else Load = DAG.getBitcast(ResultVT, Load); } - WorklistRemover DeadNodes(*this); - SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(EVE); - // Since we're explicitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorklistWithUsers(Load.getNode()); ++OpsNarrowed; - return SDValue(EVE, 0); + return Load; } /// Transform a vector binary operation into a scalar binary operation by moving @@ -19140,7 +19639,7 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast<ConstantSDNode>(Index); if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() || - Vec.getNode()->getNumValues() != 1) + Vec->getNumValues() != 1) return SDValue(); // Targets may want to avoid this to prevent an expensive register transfer. @@ -19196,8 +19695,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // EXTRACT_VECTOR_ELT may widen the extracted vector. SDValue InOp = VecOp.getOperand(0); if (InOp.getValueType() != ScalarVT) { - assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, DL, ScalarVT); + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() && + InOp.getValueType().bitsGT(ScalarVT)); + return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp); } return InOp; } @@ -19655,7 +20155,7 @@ SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { if (!isa<ConstantSDNode>(ShiftAmtVal)) return SDValue(); - uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1); + uint64_t ShiftAmt = In.getConstantOperandVal(1); // The extracted value is not extracted at the right position if (ShiftAmt != i * ScalarTypeBitsize) @@ -20096,18 +20596,39 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { int Left = 2 * In; int Right = 2 * In + 1; SmallVector<int, 8> Mask(NumElems, -1); - for (unsigned i = 0; i != NumElems; ++i) { - if (VectorMask[i] == Left) { - Mask[i] = i; - VectorMask[i] = In; - } else if (VectorMask[i] == Right) { - Mask[i] = i + NumElems; - VectorMask[i] = In; + SDValue L = Shuffles[Left]; + ArrayRef<int> LMask; + bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE && + L.use_empty() && L.getOperand(1).isUndef() && + L.getOperand(0).getValueType() == L.getValueType(); + if (IsLeftShuffle) { + LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask(); + L = L.getOperand(0); + } + SDValue R = Shuffles[Right]; + ArrayRef<int> RMask; + bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE && + R.use_empty() && R.getOperand(1).isUndef() && + R.getOperand(0).getValueType() == R.getValueType(); + if (IsRightShuffle) { + RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask(); + R = R.getOperand(0); + } + for (unsigned I = 0; I != NumElems; ++I) { + if (VectorMask[I] == Left) { + Mask[I] = I; + if (IsLeftShuffle) + Mask[I] = LMask[I]; + VectorMask[I] = In; + } else if (VectorMask[I] == Right) { + Mask[I] = I + NumElems; + if (IsRightShuffle) + Mask[I] = RMask[I] + NumElems; + VectorMask[I] = In; } } - Shuffles[In] = - DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); + Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask); } } return Shuffles[0]; @@ -20695,7 +21216,7 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = Extract->getOperand(0); unsigned BinOpcode = BinOp.getOpcode(); - if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) + if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1) return SDValue(); EVT VecVT = BinOp.getValueType(); @@ -20744,7 +21265,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); unsigned BOpcode = BinOp.getOpcode(); - if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1) + if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1) return SDValue(); // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be @@ -20803,8 +21324,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, BinOp.getOperand(0), NewExtIndex); SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(1), NewExtIndex); - SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, - BinOp.getNode()->getFlags()); + SDValue NarrowBinOp = + DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags()); return DAG.getBitcast(VT, NarrowBinOp); } @@ -21085,6 +21606,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } } + // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) + if (V.getOpcode() == ISD::SPLAT_VECTOR) + if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) + if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT)) + return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0)); + // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && @@ -21450,9 +21977,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); if (SVT != VT.getScalarType()) for (SDValue &Op : Ops) - Op = TLI.isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) - : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); + Op = Op.isUndef() ? DAG.getUNDEF(SVT) + : (TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT)); return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } @@ -21582,6 +22110,13 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { if (!Shuf->getOperand(1).isUndef()) return SDValue(); + + // If the inner operand is a known splat with no undefs, just return that directly. + // TODO: Create DemandedElts mask from Shuf's mask. + // TODO: Allow undef elements and merge with the shuffle code below. + if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false)) + return Shuf->getOperand(0); + auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0)); if (!Splat || !Splat->isSplat()) return SDValue(); @@ -21628,6 +22163,53 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, NewMask); } +// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing +// the mask can be treated as a larger type. +static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + SDValue Op0 = SVN->getOperand(0); + SDValue Op1 = SVN->getOperand(1); + EVT VT = SVN->getValueType(0); + if (Op0.getOpcode() != ISD::BITCAST) + return SDValue(); + EVT InVT = Op0.getOperand(0).getValueType(); + if (!InVT.isVector() || + (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST || + Op1.getOperand(0).getValueType() != InVT))) + return SDValue(); + if (isAnyConstantBuildVector(Op0.getOperand(0)) && + (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0)))) + return SDValue(); + + int VTLanes = VT.getVectorNumElements(); + int InLanes = InVT.getVectorNumElements(); + if (VTLanes <= InLanes || VTLanes % InLanes != 0 || + (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT))) + return SDValue(); + int Factor = VTLanes / InLanes; + + // Check that each group of lanes in the mask are either undef or make a valid + // mask for the wider lane type. + ArrayRef<int> Mask = SVN->getMask(); + SmallVector<int> NewMask; + if (!widenShuffleMaskElts(Factor, Mask, NewMask)) + return SDValue(); + + if (!TLI.isShuffleMaskLegal(NewMask, InVT)) + return SDValue(); + + // Create the new shuffle with the new mask and bitcast it back to the + // original type. + SDLoc DL(SVN); + Op0 = Op0.getOperand(0); + Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0); + SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask); + return DAG.getBitcast(VT, NewShuf); +} + /// Combine shuffle of shuffle of the form: /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, @@ -21839,7 +22421,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && - TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { + TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) SDValue L = N0.getOperand(0), R = N0.getOperand(1); @@ -21848,13 +22430,26 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL); SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); - SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, - N0.getNode()->getFlags()); + SDValue NewBO = + DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags()); SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO); SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0); return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask); } + // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x) + // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x) + if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) && + N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0) + return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0)); + + if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT) + if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2))) + if (Idx->getAPIntValue() == SplatIndex) + return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1)); + } + // If this is a bit convert that changes the element type of the vector but // not the number of vector elements, look through it. Be careful not to // look though conversions that change things like v4f32 to v2f64. @@ -22078,6 +22673,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // Match shuffles of bitcasts, so long as the mask can be treated as the + // larger type. + if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations)) + return V; + // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false @@ -22409,6 +23009,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); + // Simplify scalar inserts into an undef vector: + // insert_subvector undef, (splat X), N2 -> splat X + if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); + // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> @@ -22556,6 +23161,16 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_bf16 (bf16_to_fp op)) -> op + if (N0->getOpcode() == ISD::BF16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N0.getValueType(); @@ -22583,6 +23198,19 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0); } + // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val) + // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val) + if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && + TLI.isTypeLegal(N0.getOperand(1).getValueType())) { + SDValue Vec = N0.getOperand(0); + SDValue Subvec = N0.getOperand(1); + if ((Opcode == ISD::VECREDUCE_OR && + (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) || + (Opcode == ISD::VECREDUCE_AND && + (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec)))) + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec); + } + return SDValue(); } @@ -22886,7 +23514,7 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { - const SDNodeFlags Flags = N0.getNode()->getFlags(); + const SDNodeFlags Flags = N0->getFlags(); SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), @@ -23556,6 +24184,27 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return SDValue(); } +/// Given an ISD::SREM node expressing a remainder by constant power of 2, +/// return a DAG expression that will generate the same value. +SDValue DAGCombiner::BuildSREMPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (C->isZero()) + return SDValue(); + + SmallVector<SDNode *, 8> Built; + if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) { + for (SDNode *N : Built) + AddToWorklist(N); + return S; + } + + return SDValue(); +} + /// Determines the LogBase2 value for a non-null input value using the /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { @@ -23865,9 +24514,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { auto &Size0 = MUC0.NumBytes; auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 && - OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && - SrcValOffset1 % *Size1 == 0) { + Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 && + SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); @@ -23886,8 +24534,8 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { UseAA = false; #endif - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && - Size0.hasValue() && Size1.hasValue()) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 && + Size1) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; @@ -23920,7 +24568,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, unsigned Depth = 0; // Attempt to improve chain by a single step - std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool { + auto ImproveChain = [&](SDValue &C) -> bool { switch (C.getOpcode()) { case ISD::EntryToken: // No need to mark EntryToken. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 87a1ebe4c1db..ff5779967e22 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -72,7 +72,6 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -94,7 +93,6 @@ #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1408,16 +1406,6 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { } bool FastISel::selectBitCast(const User *I) { - // If the bitcast doesn't change the type, just use the operand value. - if (I->getType() == I->getOperand(0)->getType()) { - Register Reg = getRegForValue(I->getOperand(0)); - if (!Reg) - return false; - updateValueMap(I, Reg); - return true; - } - - // Bitcasts of other values become reg-reg copies or BITCAST operators. EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType()); EVT DstEVT = TLI.getValueType(DL, I->getType()); if (SrcEVT == MVT::Other || DstEVT == MVT::Other || @@ -1431,23 +1419,14 @@ bool FastISel::selectBitCast(const User *I) { if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - // First, try to perform the bitcast by inserting a reg-reg copy. - Register ResultReg; + // If the bitcast doesn't change the type, just use the operand value. if (SrcVT == DstVT) { - const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); - const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); - // Don't attempt a cross-class copy. It will likely fail. - if (SrcClass == DstClass) { - ResultReg = createResultReg(DstClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); - } + updateValueMap(I, Op0); + return true; } - // If the reg-reg copy failed, select a BITCAST opcode. - if (!ResultReg) - ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0); - + // Otherwise, select a BITCAST opcode. + Register ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0); if (!ResultReg) return false; @@ -2251,6 +2230,11 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { if (!MRI.hasOneUse(LoadReg)) return false; + // If the register has fixups, there may be additional uses through a + // different alias of the register. + if (FuncInfo.RegsWithFixups.contains(LoadReg)) + return false; + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); MachineInstr *User = RI->getParent(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 85c6eca5775e..aa9c77f9cabf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -31,13 +31,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" #include <algorithm> using namespace llvm; @@ -57,7 +54,7 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -static ISD::NodeType getPreferredExtendForValue(const Value *V) { +static ISD::NodeType getPreferredExtendForValue(const Instruction *I) { // For the users of the source value being used for compare instruction, if // the number of signed predicate is greater than unsigned predicate, we // prefer to use SIGN_EXTEND. @@ -67,7 +64,7 @@ static ISD::NodeType getPreferredExtendForValue(const Value *V) { // can be exposed. ISD::NodeType ExtendKind = ISD::ANY_EXTEND; unsigned NumOfSigned = 0, NumOfUnsigned = 0; - for (const User *U : V->users()) { + for (const User *U : I->users()) { if (const auto *CI = dyn_cast<CmpInst>(U)) { NumOfSigned += CI->isSigned(); NumOfUnsigned += CI->isUnsigned(); @@ -448,9 +445,14 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); - Register DestReg = ValueMap[PN]; - if (!Register::isVirtualRegister(DestReg)) + auto It = ValueMap.find(PN); + if (It == ValueMap.end()) return; + + Register DestReg = It->second; + if (DestReg == 0) + return + assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg"); LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -462,7 +464,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - APInt Val = CI->getValue().zextOrTrunc(BitWidth); + APInt Val; + if (TLI->signExtendConstant(CI)) + Val = CI->getValue().sext(BitWidth); + else + Val = CI->getValue().zext(BitWidth); DestLOI.NumSignBits = Val.getNumSignBits(); DestLOI.Known = KnownBits::makeConstant(Val); } else { @@ -494,7 +500,11 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - APInt Val = CI->getValue().zextOrTrunc(BitWidth); + APInt Val; + if (TLI->signExtendConstant(CI)) + Val = CI->getValue().sext(BitWidth); + else + Val = CI->getValue().zext(BitWidth); DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); DestLOI.Known.Zero &= ~Val; DestLOI.Known.One &= Val; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e3e05c868102..3d3b504c6abd 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -14,22 +14,18 @@ #include "InstrEmitter.h" #include "SDNodeDbgValue.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/PseudoProbe.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -321,8 +317,15 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF); if (OpRC) { + unsigned MinNumRegs = MinRCSize; + // Don't apply any RC size limit for IMPLICIT_DEF. Each use has a unique + // virtual register. + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + MinNumRegs = 0; + const TargetRegisterClass *ConstrainedRC - = MRI->constrainRegClass(VReg, OpRC, MinRCSize); + = MRI->constrainRegClass(VReg, OpRC, MinNumRegs); if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 54481b94fdd8..8bdc9410d131 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -45,7 +46,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include <algorithm> #include <cassert> #include <cstdint> #include <tuple> @@ -142,12 +142,10 @@ private: RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128, + RTLIB::Libcall Call_IEXT); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -1000,6 +998,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1036,14 +1035,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: case ISD::SETCC: + case ISD::VP_SETCC: case ISD::BR_CC: { - unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::STRICT_FSETCC ? 3 : - Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; - unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : - Node->getOpcode() == ISD::STRICT_FSETCC ? 1 : - Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0; + unsigned Opc = Node->getOpcode(); + unsigned CCOperand = Opc == ISD::SELECT_CC ? 4 + : Opc == ISD::STRICT_FSETCC ? 3 + : Opc == ISD::STRICT_FSETCCS ? 3 + : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2 + : 1; + unsigned CompareOperand = Opc == ISD::BR_CC ? 2 + : Opc == ISD::STRICT_FSETCC ? 1 + : Opc == ISD::STRICT_FSETCCS ? 1 + : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); @@ -1174,6 +1177,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOpcode(), cast<VPStoreSDNode>(Node)->getValue().getValueType()); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast<VPStridedStoreSDNode>(Node)->getValue().getValueType()); + break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -1187,6 +1195,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: + case ISD::IS_FPCLASS: Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(0).getValueType()); break; @@ -1212,7 +1221,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { - Action = TargetLowering::Legal; + Action = TLI.getCustomOperationAction(*Node); } else { Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); } @@ -1723,16 +1732,14 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue Chain) { - unsigned SrcSize = SrcOp.getValueSizeInBits(); - unsigned SlotSize = SlotVT.getSizeInBits(); - unsigned DestSize = DestVT.getSizeInBits(); + EVT SrcVT = SrcOp.getValueType(); Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); Align DestAlign = DAG.getDataLayout().getPrefTypeAlign(DestType); // Don't convert with stack if the load/store is expensive. - if ((SrcSize > SlotSize && + if ((SrcVT.bitsGT(SlotVT) && !TLI.isTruncStoreLegalOrCustom(SrcOp.getValueType(), SlotVT)) || - (SlotSize < DestSize && + (SlotVT.bitsLT(DestVT) && !TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, DestVT, SlotVT))) return SDValue(); @@ -1750,20 +1757,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, // later than DestVT. SDValue Store; - if (SrcSize > SlotSize) + if (SrcVT.bitsGT(SlotVT)) Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SlotVT, SrcAlign); else { - assert(SrcSize == SlotSize && "Invalid store"); - Store = - DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); + assert(SrcVT.bitsEq(SlotVT) && "Invalid store"); + Store = DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. - if (SlotSize == DestSize) + if (SlotVT.bitsEq(DestVT)) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); - assert(SlotSize < DestSize && "Unknown extension!"); + assert(SlotVT.bitsLT(DestVT) && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, DestAlign); } @@ -2101,15 +2107,17 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, ExpandFPLibCall(Node, LC, Results); } -SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128) { +SDValue SelectionDAGLegalize::ExpandIntLibCall( + SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + + default: + LC = Call_IEXT; + break; + case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2144,7 +2152,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + + default: + LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT; + break; + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2893,6 +2905,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getValueType(0), dl))) Results.push_back(Tmp1); break; + case ISD::BF16_TO_FP: { + // Always expand bf16 to f32 casts, they lower to ext + shift. + SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op); + Op = DAG.getNode( + ISD::SHL, dl, MVT::i32, Op, + DAG.getConstant(16, dl, + TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); + Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op); + Results.push_back(Op); + break; + } case ISD::SIGN_EXTEND_INREG: { EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); @@ -2904,7 +2928,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero. // TODO: Do this for vectors too? - if (ExtraVT.getSizeInBits() == 1) { + if (ExtraVT.isScalarInteger() && ExtraVT.getSizeInBits() == 1) { SDValue One = DAG.getConstant(1, dl, VT); SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One); SDValue Zero = DAG.getConstant(0, dl, VT); @@ -3135,6 +3159,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::FABS: Results.push_back(ExpandFABS(Node)); break; + case ISD::IS_FPCLASS: { + auto CNode = cast<ConstantSDNode>(Node->getOperand(1)); + auto Test = static_cast<FPClassTest>(CNode->getZExtValue()); + if (SDValue Expanded = + TLI.expandIS_FPCLASS(Node->getValueType(0), Node->getOperand(0), + Test, Node->getFlags(), SDLoc(Node), DAG)) + Results.push_back(Expanded); + break; + } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3577,18 +3610,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::SETCC: + case ISD::VP_SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: { - bool IsStrict = Node->getOpcode() != ISD::SETCC; + bool IsVP = Node->getOpcode() == ISD::VP_SETCC; + bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || + Node->getOpcode() == ISD::STRICT_FSETCCS; bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); unsigned Offset = IsStrict ? 1 : 0; Tmp1 = Node->getOperand(0 + Offset); Tmp2 = Node->getOperand(1 + Offset); Tmp3 = Node->getOperand(2 + Offset); - bool Legalized = - TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, - NeedInvert, dl, Chain, IsSignaling); + SDValue Mask, EVL; + if (IsVP) { + Mask = Node->getOperand(3 + Offset); + EVL = Node->getOperand(4 + Offset); + } + bool Legalized = TLI.LegalizeSetCCCondCode( + DAG, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Mask, EVL, NeedInvert, dl, + Chain, IsSignaling); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the @@ -3598,6 +3639,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(), {Chain, Tmp1, Tmp2, Tmp3}, Node->getFlags()); Chain = Tmp1.getValue(1); + } else if (IsVP) { + Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), + {Tmp1, Tmp2, Tmp3, Mask, EVL}, Node->getFlags()); } else { Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1, Tmp2, Tmp3, Node->getFlags()); @@ -3606,8 +3650,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. - if (NeedInvert) - Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + if (NeedInvert) { + if (!IsVP) + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + else + Tmp1 = + DAG.getVPLogicalNOT(dl, Tmp1, Mask, EVL, Tmp1->getValueType(0)); + } Results.push_back(Tmp1); if (IsStrict) @@ -3622,21 +3671,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. + // FIXME: This drops the mask/evl for VP_SETCC. EVT VT = Node->getValueType(0); - int TrueValue; - switch (TLI.getBooleanContents(Tmp1.getValueType())) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - TrueValue = 1; - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = -1; - break; - } + EVT Tmp1VT = Tmp1.getValueType(); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(TrueValue, dl, VT), - DAG.getConstant(0, dl, VT), - Tmp3); + DAG.getBoolConstant(true, dl, VT, Tmp1VT), + DAG.getBoolConstant(false, dl, VT, Tmp1VT), Tmp3); Tmp1->setFlags(Node->getFlags()); Results.push_back(Tmp1); break; @@ -3692,7 +3732,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (!Legalized) { Legalized = TLI.LegalizeSetCCCondCode( DAG, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, - NeedInvert, dl, Chain); + /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain); assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); @@ -3725,9 +3765,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - bool Legalized = - TLI.LegalizeSetCCCondCode(DAG, getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, NeedInvert, dl, Chain); + bool Legalized = TLI.LegalizeSetCCCondCode( + DAG, getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, + /*Mask*/ SDValue(), /*EVL*/ SDValue(), NeedInvert, dl, Chain); (void)Legalized; assert(Legalized && "Can't legalize BR_CC with legal condition!"); @@ -4068,12 +4108,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi."); if (!TLI.getLibcallName(LC)) { // Some targets don't have a powi libcall; use pow instead. - SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), - Node->getValueType(0), - Node->getOperand(1)); - Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node), - Node->getValueType(0), Node->getOperand(0), - Exponent)); + if (Node->isStrictFPOpcode()) { + SDValue Exponent = + DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(Node), + {Node->getValueType(0), Node->getValueType(1)}, + {Node->getOperand(0), Node->getOperand(2)}); + SDValue FPOW = + DAG.getNode(ISD::STRICT_FPOW, SDLoc(Node), + {Node->getValueType(0), Node->getValueType(1)}, + {Exponent.getValue(1), Node->getOperand(1), Exponent}); + Results.push_back(FPOW); + Results.push_back(FPOW.getValue(1)); + } else { + SDValue Exponent = + DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), Node->getValueType(0), + Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node), + Node->getValueType(0), + Node->getOperand(0), Exponent)); + } break; } unsigned Offset = Node->isStrictFPOpcode() ? 1 : 0; @@ -4176,6 +4229,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::FP_TO_BF16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: @@ -4315,28 +4375,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: - Results.push_back(ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128)); + Results.push_back(ExpandIntLibCall( + Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT)); break; case ISD::UREM: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT)); break; case ISD::SDIV: - Results.push_back(ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + Results.push_back(ExpandIntLibCall( + Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT)); break; case ISD::UDIV: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT)); break; case ISD::SDIVREM: case ISD::UDIVREM: @@ -4344,10 +4400,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::MUL_I8, - RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT)); break; case ISD::CTLZ_ZERO_UNDEF: switch (Node->getSimpleValueType(0).SimpleTy) { @@ -4700,6 +4755,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FMINNUM: + case ISD::STRICT_FMAXNUM: case ISD::STRICT_FREM: case ISD::STRICT_FPOW: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, @@ -4724,6 +4785,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; + case ISD::STRICT_FMA: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(3)}); + Tmp4 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1), + Tmp2.getValue(1), Tmp3.getValue(1)); + Tmp4 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp4, Tmp1, Tmp2, Tmp3}); + Tmp4 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp4.getValue(1), Tmp4, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp4); + Results.push_back(Tmp4.getValue(1)); + break; case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); @@ -4740,6 +4817,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); break; } + case ISD::STRICT_FPOWI: + Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, Node->getOperand(2)}); + Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other}, + {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)}); + Results.push_back(Tmp3); + Results.push_back(Tmp3.getValue(1)); + break; case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: @@ -4764,12 +4851,19 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FROUND: + case ISD::STRICT_FROUNDEVEN: + case ISD::STRICT_FTRUNC: + case ISD::STRICT_FSQRT: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG2: case ISD::STRICT_FLOG10: case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, {Node->getOperand(0), Node->getOperand(1)}); Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other}, diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6bf38d7296a8..f464208cd9dc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -273,6 +273,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG)) + return SoftenFloatRes_SELECT_CC(SelCC.getNode()); return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, @@ -282,6 +284,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG)) + return SoftenFloatRes_SELECT_CC(SelCC.getNode()); return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, @@ -830,6 +834,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; case ISD::STRICT_FP_TO_FP16: case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::FP_TO_BF16: case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::STRICT_FP_TO_SINT: @@ -881,16 +886,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || N->getOpcode() == ISD::STRICT_FP_TO_FP16 || + N->getOpcode() == ISD::FP_TO_BF16 || N->getOpcode() == ISD::STRICT_FP_ROUND); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); - EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 || - N->getOpcode() == ISD::STRICT_FP_TO_FP16) - ? MVT::f16 - : RVT; + EVT FloatRVT = RVT; + if (N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_TO_FP16) + FloatRVT = MVT::f16; + else if (N->getOpcode() == ISD::FP_TO_BF16) + FloatRVT = MVT::bf16; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); @@ -2064,9 +2072,13 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f16) { - return ISD::FP16_TO_FP; + return ISD::FP16_TO_FP; } else if (RetVT == MVT::f16) { - return ISD::FP_TO_FP16; + return ISD::FP_TO_FP16; + } else if (OpVT == MVT::bf16) { + return ISD::BF16_TO_FP; + } else if (RetVT == MVT::bf16) { + return ISD::FP_TO_BF16; } report_fatal_error("Attempt at an invalid promotion-related conversion"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8c7b90b6cd33..69fd83bcd7b3 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -78,6 +78,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: case ISD::VSELECT: case ISD::VP_SELECT: + case ISD::VP_MERGE: Res = PromoteIntRes_Select(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; @@ -97,6 +98,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break; case ISD::SRL: case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; @@ -115,11 +117,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_VECTOR_ELT: Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; case ISD::BUILD_VECTOR: - Res = PromoteIntRes_BUILD_VECTOR(N); break; - case ISD::SCALAR_TO_VECTOR: - Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + Res = PromoteIntRes_BUILD_VECTOR(N); + break; case ISD::SPLAT_VECTOR: - Res = PromoteIntRes_SPLAT_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_ScalarOp(N); + break; case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -133,6 +136,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::VP_FPTOSI: + case ISD::VP_FPTOUI: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: @@ -262,6 +267,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = PromoteIntRes_FunnelShift(N); break; + + case ISD::IS_FPCLASS: + Res = PromoteIntRes_IS_FPCLASS(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -435,10 +444,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // interesting bits will end up at the wrong place. if (DAG.getDataLayout().isBigEndian()) { unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout()); assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!"); Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res, - DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); + DAG.getShiftAmountConstant(ShiftAmt, NOutVT, dl)); } return Res; } @@ -446,13 +454,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { // as the widened input type would be a legal type, we can widen the bitcast // and handle the promotion after. if (NOutVT.isVector()) { - unsigned WidenInSize = NInVT.getSizeInBits(); - unsigned OutSize = OutVT.getSizeInBits(); - if (WidenInSize % OutSize == 0) { - unsigned Scale = WidenInSize / OutSize; - EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(), - OutVT.getVectorElementType(), - OutVT.getVectorNumElements() * Scale); + TypeSize WidenInSize = NInVT.getSizeInBits(); + TypeSize OutSize = OutVT.getSizeInBits(); + if (WidenInSize.hasKnownScalarFactor(OutSize)) { + unsigned Scale = WidenInSize.getKnownScalarFactor(OutSize); + EVT WideOutVT = + EVT::getVectorVT(*DAG.getContext(), OutVT.getVectorElementType(), + OutVT.getVectorElementCount() * Scale); if (isTypeLegal(WideOutVT)) { InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp)); InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp, @@ -490,9 +498,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, ShiftVT)); + DAG.getShiftAmountConstant(DiffBits, NVT, dl)); } SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { @@ -512,10 +519,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), - DAG.getConstant(DiffBits, dl, ShiftVT)); + DAG.getShiftAmountConstant(DiffBits, NVT, dl)); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -666,6 +672,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) NewOpc = ISD::STRICT_FP_TO_SINT; + if (N->getOpcode() == ISD::VP_FPTOUI && + !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) && + TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT)) + NewOpc = ISD::VP_FPTOSI; + SDValue Res; if (N->isStrictFPOpcode()) { Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, @@ -673,8 +684,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else + } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) { + Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1), + N->getOperand(2)}); + } else { Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + } // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -684,8 +699,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || - N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? - ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + N->getOpcode() == ISD::STRICT_FP_TO_UINT || + N->getOpcode() == ISD::VP_FPTOUI) + ? ISD::AssertZext + : ISD::AssertSext, + dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -889,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { } unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + SDValue ShiftAmount = + DAG.getShiftAmountConstant(SHLAmount, PromotedType, dl); Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); if (!IsShift) @@ -939,14 +957,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { // which is extends the values that we clamp to on saturation. This could be // resolved by shifting one of the operands the same amount, which would // also shift the result we compare against, then shifting back. - EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, - DAG.getConstant(DiffSize, dl, ShiftTy)); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getShiftAmountConstant(DiffSize, PromotedType, dl)); SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL; return DAG.getNode(ShiftOp, dl, PromotedType, Result, - DAG.getConstant(DiffSize, dl, ShiftTy)); + DAG.getShiftAmountConstant(DiffSize, PromotedType, dl)); } return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); @@ -1043,17 +1061,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale); if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) { - EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); unsigned Diff = PromotedType.getScalarSizeInBits() - N->getValueType(0).getScalarSizeInBits(); if (Saturating) - Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, - DAG.getConstant(Diff, dl, ShiftTy)); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getShiftAmountConstant(Diff, PromotedType, dl)); SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted, N->getOperand(2)); if (Saturating) Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res, - DAG.getConstant(Diff, dl, ShiftTy)); + DAG.getShiftAmountConstant(Diff, PromotedType, dl)); return Res; } } @@ -1110,11 +1128,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Select(SDNode *N) { SDValue RHS = GetPromotedInteger(N->getOperand(2)); unsigned Opcode = N->getOpcode(); - return Opcode == ISD::VP_SELECT - ? DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS, - N->getOperand(3)) - : DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, - RHS); + if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) + return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS, + N->getOperand(3)); + return DAG.getNode(Opcode, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { @@ -1167,6 +1184,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { return DAG.getSExtOrTrunc(SetCC, dl, NVT); } +SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) { + SDLoc DL(N); + SDValue Arg = N->getOperand(0); + SDValue Test = N->getOperand(1); + EVT NResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); @@ -1265,7 +1290,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { SDValue Hi = GetPromotedInteger(N->getOperand(0)); SDValue Lo = GetPromotedInteger(N->getOperand(1)); - SDValue Amt = GetPromotedInteger(N->getOperand(2)); + SDValue Amt = N->getOperand(2); + if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) + Amt = ZExtPromotedInteger(Amt); + EVT AmtVT = Amt.getValueType(); SDLoc DL(N); EVT OldVT = N->getOperand(0).getValueType(); @@ -1276,7 +1304,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { unsigned NewBits = VT.getScalarSizeInBits(); // Amount has to be interpreted modulo the old bit width. - Amt = DAG.getNode(ISD::UREM, DL, VT, Amt, DAG.getConstant(OldBits, DL, VT)); + Amt = DAG.getNode(ISD::UREM, DL, AmtVT, Amt, + DAG.getConstant(OldBits, DL, AmtVT)); // If the promoted type is twice the size (or more), then we use the // traditional funnel 'double' shift codegen. This isn't necessary if the @@ -1296,13 +1325,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { } // Shift Lo up to occupy the upper bits of the promoted type. - SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, VT); + SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT); Lo = DAG.getNode(ISD::SHL, DL, VT, Lo, ShiftOffset); // Increase Amount to shift the result into the lower bits of the promoted // type. if (IsFSHR) - Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, ShiftOffset); + Amt = DAG.getNode(ISD::ADD, DL, AmtVT, Amt, ShiftOffset); return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt); } @@ -1336,11 +1365,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), NumElts.divideCoefficientBy(2)); - EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); - EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); - + if (N->getOpcode() == ISD::TRUNCATE) { + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + } else { + assert(N->getOpcode() == ISD::VP_TRUNCATE && + "Expected VP_TRUNCATE opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo); + EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi); + } return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } + // TODO: VP_TRUNCATE need to handle when TypeWidenVector access to some + // targets. case TargetLowering::TypeWidenVector: { SDValue WideInOp = GetWidenedVector(InOp); @@ -1362,6 +1403,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { } // Truncate to NVT instead of VT + if (N->getOpcode() == ISD::VP_TRUNCATE) + return DAG.getNode(ISD::VP_TRUNCATE, dl, NVT, Res, N->getOperand(1), + N->getOperand(2)); return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); } @@ -1432,6 +1476,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N, } SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + + // If a larger ABS or SMAX isn't supported by the target, try to expand now. + // If we expand later we'll end up sign extending more than just the sra input + // in sra+xor+sub expansion. + if (!OVT.isVector() && + !TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) && + !TLI.isOperationLegal(ISD::SMAX, NVT)) { + if (SDValue Res = TLI.expandABS(N, DAG)) + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res); + } + SDValue Op0 = SExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0); } @@ -1466,9 +1523,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout()); - SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, - DAG.getConstant(Shift, DL, ShiftTy)); + SDValue Hi = + DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getShiftAmountConstant(Shift, Mul.getValueType(), DL)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, DAG.getConstant(0, DL, Hi.getValueType()), ISD::SETNE); @@ -1498,7 +1555,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits())); + return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits())); } SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { @@ -1578,16 +1635,19 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; case ISD::INSERT_VECTOR_ELT: - Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; - case ISD::SCALAR_TO_VECTOR: - Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo); + break; case ISD::SPLAT_VECTOR: - Res = PromoteIntOp_SPLAT_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_ScalarOp(N); + break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::VP_SETCC: case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::VP_SITOFP: case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), @@ -1600,8 +1660,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: + case ISD::VP_UITOFP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -1614,6 +1676,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + case ISD::FSHL: + case ISD::FSHR: Res = PromoteIntOp_FunnelShift(N); break; + case ISD::SADDO_CARRY: case ISD::SSUBO_CARRY: case ISD::ADDCARRY: @@ -1848,20 +1913,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, N->getOperand(1), Idx), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { - // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote - // the operand in place. +SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) { + // Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated, + // so just promote the operand in place. return SDValue(DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { - // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the - // operand in place. - return SDValue( - DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote the condition!"); SDValue Cond = N->getOperand(0); @@ -1900,7 +1958,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); // The CC (#2) is always legal. - return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); + if (N->getOpcode() == ISD::SETCC) + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); + + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { @@ -1908,6 +1973,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { ZExtPromotedInteger(N->getOperand(1))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_FunnelShift(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), + ZExtPromotedInteger(N->getOperand(2))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); SDLoc dl(N); @@ -1917,6 +1987,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + if (N->getOpcode() == ISD::VP_SITOFP) + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0)), + N->getOperand(1), N->getOperand(2)), + 0); return SDValue(DAG.UpdateNodeOperands(N, SExtPromotedInteger(N->getOperand(0))), 0); } @@ -1980,8 +2055,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo) { - SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { // The Mask EVT DataVT = N->getValueType(0); @@ -2010,6 +2085,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo) { bool TruncateStore = N->isTruncatingStore(); SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { // The Mask EVT DataVT = N->getValue().getValueType(); @@ -2021,9 +2097,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); else NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); - - N->setIndexType(TLI.getCanonicalIndexType(N->getIndexType(), - N->getMemoryVT(), NewOps[OpNo])); } else { NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); TruncateStore = true; @@ -2036,10 +2109,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); + if (N->getOpcode() == ISD::VP_TRUNCATE) + return DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op, + N->getOperand(1), N->getOperand(2)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + if (N->getOpcode() == ISD::VP_UITOFP) + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0)), + N->getOperand(1), N->getOperand(2)), + 0); return SDValue(DAG.UpdateNodeOperands(N, ZExtPromotedInteger(N->getOperand(0))), 0); } @@ -2468,7 +2549,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, EVT ShTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::SHL) { - if (Amt.ugt(VTBits)) { + if (Amt.uge(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt.ugt(NVTBits)) { Lo = DAG.getConstant(0, DL, NVT); @@ -2489,7 +2570,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } if (N->getOpcode() == ISD::SRL) { - if (Amt.ugt(VTBits)) { + if (Amt.uge(VTBits)) { Lo = Hi = DAG.getConstant(0, DL, NVT); } else if (Amt.ugt(NVTBits)) { Lo = DAG.getNode(ISD::SRL, DL, @@ -2510,7 +2591,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); - if (Amt.ugt(VTBits)) { + if (Amt.uge(VTBits)) { Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(NVTBits - 1, DL, ShTy)); } else if (Amt.ugt(NVTBits)) { @@ -3132,24 +3213,23 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedInteger(N0, Lo, Hi); EVT NVT = Lo.getValueType(); - // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we - // use in LegalizeDAG. The ADD part of the expansion is based on - // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that - // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded + // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we + // use in LegalizeDAG. The SUB part of the expansion is based on + // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that + // SUBCARRY is LegalOrCustom. Each of the pieces here can be further expanded // if needed. Shift expansion has a special case for filling with sign bits // so that we will only end up with one SRA. - bool HasAddCarry = TLI.isOperationLegalOrCustom( - ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); - if (HasAddCarry) { - EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - SDValue Sign = - DAG.getNode(ISD::SRA, dl, NVT, Hi, - DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); + bool HasSubCarry = TLI.isOperationLegalOrCustom( + ISD::SUBCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasSubCarry) { + SDValue Sign = DAG.getNode( + ISD::SRA, dl, NVT, Hi, + DAG.getShiftAmountConstant(NVT.getSizeInBits() - 1, NVT, dl)); SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign); Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign); + Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign); + Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); return; } @@ -3160,8 +3240,8 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue NegLo, NegHi; SplitInteger(Neg, NegLo, NegHi); - SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), - DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); + SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, + DAG.getConstant(0, dl, NVT), ISD::SETLT); Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi); } @@ -3223,12 +3303,11 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); SDValue Chain = Lo.getValue(1); // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, - DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); + DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl)); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -3535,8 +3614,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); - EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); + SDValue Shift = DAG.getShiftAmountConstant(HalfBits, NVT, dl); SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); @@ -3667,7 +3745,6 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, unsigned NVTSize = NVT.getScalarSizeInBits(); assert((VTSize == NVTSize * 2) && "Expected the new value type to be half " "the size of the current value type"); - EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); // After getting the multiplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. @@ -3690,7 +3767,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // shifting. uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. if (Scale % NVTSize) { - SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + SDValue ShiftAmount = DAG.getShiftAmountConstant(Scale % NVTSize, NVT, dl); Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], ShiftAmount); Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], @@ -3731,8 +3808,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, if (!Signed) { if (Scale < NVTSize) { // Overflow happened if ((HH | (HL >> Scale)) != 0). - SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, - DAG.getConstant(Scale, dl, ShiftTy)); + SDValue HLAdjusted = + DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getShiftAmountConstant(Scale, NVT, dl)); SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); } else if (Scale == NVTSize) { @@ -3740,9 +3818,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); } else if (Scale < VTSize) { // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). - SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, - DAG.getConstant(Scale - NVTSize, dl, - ShiftTy)); + SDValue HLAdjusted = + DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getShiftAmountConstant(Scale - NVTSize, NVT, dl)); SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); } else llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" @@ -3901,6 +3979,70 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, ReplaceValueWith(SDValue(Node, 1), Ovf); } +// Emit a call to __udivei4 and friends which require +// the arguments be based on the stack +// and extra argument that contains the number of bits of the operands. +// Returns the result of the call operation. +static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI, + const RTLIB::Libcall &LC, + SelectionDAG &DAG, SDNode *N, + const SDLoc &DL, const EVT &VT) { + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // The signature of __udivei4 is + // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b, + // unsigned int bits) + EVT ArgVT = N->op_begin()->getValueType(); + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 && + "Unexpected argument type for lowering"); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + SDValue Output = DAG.CreateStackTemporary(ArgVT); + Entry.Node = Output; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + for (const llvm::SDUse &Op : N->ops()) { + SDValue StackPtr = DAG.CreateStackTemporary(ArgVT); + InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo()); + Entry.Node = StackPtr; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + } + + int Bits = N->getOperand(0) + .getValueType() + .getTypeForEVT(*DAG.getContext()) + ->getIntegerBitWidth(); + Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout())); + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.IsSExt = false; + Entry.IsZExt = true; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)) + .setDiscardResult(); + + SDValue Chain = TLI.LowerCallTo(CLI).second; + + return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo()); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -3922,6 +4064,14 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I64; else if (VT == MVT::i128) LC = RTLIB::SDIV_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4113,6 +4263,14 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I64; else if (VT == MVT::i128) LC = RTLIB::SREM_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4288,6 +4446,14 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I64; else if (VT == MVT::i128) LC = RTLIB::UDIV_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4315,6 +4481,14 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I64; else if (VT == MVT::i128) LC = RTLIB::UREM_I128; + + else { + SDValue Result = + ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5060,7 +5234,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { return DAG.getBuildVector(NOutVT, dl, Ops); } -SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) { SDLoc dl(N); @@ -5070,35 +5244,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - EVT NOutVTElem = NOutVT.getVectorElementType(); - - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); - - return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); -} - -SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { - SDLoc dl(N); - - SDValue SplatVal = N->getOperand(0); - - assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); - - EVT OutVT = N->getValueType(0); - EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - assert(NOutVT.isVector() && "Type must be promoted to a vector type"); EVT NOutElemVT = NOutVT.getVectorElementType(); - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0)); - return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); + return DAG.getNode(N->getOpcode(), dl, NOutVT, Op); } SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) { SDLoc dl(N); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + assert(NOutVT.isScalableVector() && + "Type must be promoted to a scalable vector type"); APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); return DAG.getStepVector(dl, NOutVT, StepVal.sext(NOutVT.getScalarSizeInBits())); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 03dcd0f6d2c9..8fe9a83b9c3d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -13,10 +13,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "SDNodeDbgValue.h" #include "llvm/ADT/SetVector.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -86,46 +83,49 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { auto ResId = ValueToIdMap.lookup(Res); unsigned Mapped = 0; - if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { - Mapped |= 1; - // Check that remapped values are only used by nodes marked NewNode. - for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); - UI != UE; ++UI) - if (UI.getUse().getResNo() == i) - assert(UI->getNodeId() == NewNode && - "Remapped value has non-trivial use!"); - - // Check that the final result of applying ReplacedValues is not - // marked NewNode. - auto NewValId = ReplacedValues[ResId]; - auto I = ReplacedValues.find(NewValId); - while (I != ReplacedValues.end()) { - NewValId = I->second; + if (ResId) { + auto I = ReplacedValues.find(ResId); + if (I != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + auto NewValId = I->second; I = ReplacedValues.find(NewValId); + while (I != ReplacedValues.end()) { + NewValId = I->second; + I = ReplacedValues.find(NewValId); + } + SDValue NewVal = getSDValue(NewValId); + (void)NewVal; + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); } - SDValue NewVal = getSDValue(NewValId); - (void)NewVal; - assert(NewVal.getNode()->getNodeId() != NewNode && - "ReplacedValues maps to a new node!"); + if (PromotedIntegers.count(ResId)) + Mapped |= 2; + if (SoftenedFloats.count(ResId)) + Mapped |= 4; + if (ScalarizedVectors.count(ResId)) + Mapped |= 8; + if (ExpandedIntegers.count(ResId)) + Mapped |= 16; + if (ExpandedFloats.count(ResId)) + Mapped |= 32; + if (SplitVectors.count(ResId)) + Mapped |= 64; + if (WidenedVectors.count(ResId)) + Mapped |= 128; + if (PromotedFloats.count(ResId)) + Mapped |= 256; + if (SoftPromotedHalfs.count(ResId)) + Mapped |= 512; } - if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end()) - Mapped |= 2; - if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end()) - Mapped |= 4; - if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end()) - Mapped |= 8; - if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end()) - Mapped |= 16; - if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end()) - Mapped |= 32; - if (ResId && SplitVectors.find(ResId) != SplitVectors.end()) - Mapped |= 64; - if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end()) - Mapped |= 128; - if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) - Mapped |= 256; - if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end()) - Mapped |= 512; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes @@ -143,8 +143,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { } } else { if (Mapped == 0) { - dbgs() << "Processed value not in any map!"; - Failed = true; + SDValue NodeById = IdToValueMap.lookup(ResId); + // It is possible the node has been remapped to another node and had + // its Id updated in the Value to Id table. The node it remapped to + // may not have been processed yet. Look up the Id in the Id to Value + // table and re-check the Processed state. If the node hasn't been + // remapped we'll get the same state as we got earlier. + if (NodeById->getNodeId() == Processed) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } } else if (Mapped & (Mapped - 1)) { dbgs() << "Value in multiple maps!"; Failed = true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4d8daa82d8c0..de320290bda9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -19,7 +19,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" namespace llvm { @@ -309,8 +308,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); - SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); - SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntRes_ScalarOp(SDNode *N); SDValue PromoteIntRes_STEP_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); @@ -362,6 +360,7 @@ private: SDValue PromoteIntRes_ABS(SDNode *N); SDValue PromoteIntRes_Rotate(SDNode *N); SDValue PromoteIntRes_FunnelShift(SDNode *N); + SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); @@ -377,12 +376,12 @@ private: SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); - SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); - SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); + SDValue PromoteIntOp_ScalarOp(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_FunnelShift(SDNode *N); SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N); @@ -784,6 +783,7 @@ private: SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); SDValue ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N); + SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N); SDValue ScalarizeVecRes_FIX(SDNode *N); @@ -850,6 +850,7 @@ private: void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); @@ -960,6 +961,7 @@ private: SDValue WidenVecRes_Convert_StrictFP(SDNode *N); SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); + SDValue WidenVecRes_IS_FPCLASS(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); @@ -985,6 +987,7 @@ private: SDValue WidenVecOp_Convert(SDNode *N); SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N); SDValue WidenVecOp_FCOPYSIGN(SDNode *N); + SDValue WidenVecOp_IS_FPCLASS(SDNode *N); SDValue WidenVecOp_VECREDUCE(SDNode *N); SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N); SDValue WidenVecOp_VP_REDUCE(SDNode *N); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index abf6a3ac6916..842ffa2aa23e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -26,11 +26,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -41,7 +39,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" -#include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> #include <iterator> @@ -464,6 +461,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VPID: { \ EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ : Node->getOperand(LEGALPOS).getValueType(); \ + if (ISD::VPID == ISD::VP_SETCC) { \ + ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ + Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ + if (Action != TargetLowering::Legal) \ + break; \ + } \ Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ } break; #include "llvm/IR/VPIntrinsics.def" @@ -747,6 +750,7 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { ExpandFSUB(Node, Results); return; case ISD::SETCC: + case ISD::VP_SETCC: ExpandSETCC(Node, Results); return; case ISD::ABS: @@ -1050,10 +1054,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { // Shuffle the incoming lanes into the correct position, and pull all other // lanes from the zero vector. - SmallVector<int, 16> ShuffleMask; - ShuffleMask.reserve(NumSrcElements); - for (int i = 0; i < NumSrcElements; ++i) - ShuffleMask.push_back(i); + auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements)); int ExtLaneScale = NumSrcElements / NumElements; int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; @@ -1423,6 +1424,7 @@ void VectorLegalizer::ExpandFSUB(SDNode *Node, void VectorLegalizer::ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results) { bool NeedInvert = false; + bool IsVP = Node->getOpcode() == ISD::VP_SETCC; SDLoc dl(Node); MVT OpVT = Node->getOperand(0).getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); @@ -1436,20 +1438,36 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node, SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue CC = Node->getOperand(2); - bool Legalized = TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, - RHS, CC, NeedInvert, dl, Chain); + SDValue Mask, EVL; + if (IsVP) { + Mask = Node->getOperand(3); + EVL = Node->getOperand(4); + } + + bool Legalized = + TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask, + EVL, NeedInvert, dl, Chain); if (Legalized) { // If we expanded the SETCC by swapping LHS and RHS, or by inverting the // condition code, create a new SETCC node. - if (CC.getNode()) - LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, - Node->getFlags()); + if (CC.getNode()) { + if (!IsVP) + LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, + Node->getFlags()); + else + LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0), + {LHS, RHS, CC, Mask, EVL}, Node->getFlags()); + } // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. - if (NeedInvert) - LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); + if (NeedInvert) { + if (!IsVP) + LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); + else + LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0)); + } } else { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 0bd44ce4c872..fa555be00ded 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -20,7 +20,9 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" @@ -64,6 +66,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -231,9 +234,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { // Now process the remaining operands. for (unsigned i = 1; i < NumOpers; ++i) { SDValue Oper = N->getOperand(i); + EVT OperVT = Oper.getValueType(); - if (Oper.getValueType().isVector()) - Oper = GetScalarizedVector(Oper); + if (OperVT.isVector()) { + if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector) + Oper = GetScalarizedVector(Oper); + else + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperVT.getVectorElementType(), Oper, + DAG.getVectorIdxConstant(0, dl)); + } Opers[i] = Oper; } @@ -582,6 +592,29 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { return DAG.getNode(ExtendCode, DL, NVT, Res); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) { + SDLoc DL(N); + SDValue Arg = N->getOperand(0); + SDValue Test = N->getOperand(1); + EVT ArgVT = Arg.getValueType(); + EVT ResultVT = N->getValueType(0).getVectorElementType(); + + if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) { + Arg = GetScalarizedVector(Arg); + } else { + EVT VT = ArgVT.getVectorElementType(); + Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Arg, + DAG.getVectorIdxConstant(0, DL)); + } + + SDValue Res = + DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags()); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT)); + return DAG.getNode(ExtendCode, DL, ResultVT, Res); +} //===----------------------------------------------------------------------===// // Operand Vector Scalarization <1 x ty> -> ty. @@ -926,6 +959,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: @@ -949,6 +983,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true); break; case ISD::SETCC: + case ISD::VP_SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; case ISD::VECTOR_REVERSE: @@ -988,13 +1023,17 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: - case ISD::FNEG: + case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FP_EXTEND: + case ISD::VP_FP_EXTEND: case ISD::FP_ROUND: + case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::VP_FPTOSI: case ISD::FP_TO_UINT: + case ISD::VP_FPTOUI: case ISD::FRINT: case ISD::FROUND: case ISD::FROUNDEVEN: @@ -1002,8 +1041,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: case ISD::FTRUNC: case ISD::SINT_TO_FP: + case ISD::VP_SITOFP: case ISD::TRUNCATE: + case ISD::VP_TRUNCATE: case ISD::UINT_TO_FP: + case ISD::VP_UITOFP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -1011,6 +1053,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: + case ISD::VP_SIGN_EXTEND: + case ISD::VP_ZERO_EXTEND: SplitVecRes_ExtendOp(N, Lo, Hi); break; @@ -1053,7 +1097,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ROTR: SplitVecRes_BinOp(N, Lo, Hi); break; - case ISD::FMA: + case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: case ISD::FSHR: SplitVecRes_TernaryOp(N, Lo, Hi); @@ -1175,10 +1219,28 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, - Op2Lo, N->getFlags()); - Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, - Op2Hi, N->getFlags()); + const SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + if (N->getNumOperands() == 3) { + Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags); + Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags); + return; + } + + assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl); + + Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), + {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags); + Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), + {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags); } void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { @@ -1398,6 +1460,19 @@ void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); } +void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + SDValue ArgLo, ArgHi; + SDValue Test = N->getOperand(1); + GetSplitVector(N->getOperand(0), ArgLo, ArgHi); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags()); + Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags()); +} + void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -2043,8 +2118,20 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { else std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + if (N->getOpcode() == ISD::SETCC) { + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + } else { + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo, + EVLLo); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi, + EVLHi); + } } void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, @@ -2056,22 +2143,37 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. - unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; - EVT InVT = N->getOperand(OpNo).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) - GetSplitVector(N->getOperand(OpNo), Lo, Hi); + GetSplitVector(N->getOperand(0), Lo, Hi); else - std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); - if (N->getOpcode() == ISD::FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1), - N->getFlags()); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1), - N->getFlags()); - } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags()); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags()); + const SDNodeFlags Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + if (N->getNumOperands() <= 2) { + if (Opcode == ISD::FP_ROUND) { + Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags); + } else { + Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags); + } + return; } + + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + + Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags); + Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags); } void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, @@ -2107,14 +2209,34 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; N->dump(&DAG); dbgs() << "\n"); + if (!N->isVPOpcode()) { + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + // Extend the source vector by one step. SDValue NewSrc = - DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Get the low and high halves of the new, extended one step, vector. std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); // Extend those vector halves the rest of the way. - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo}); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi}); return; } } @@ -2126,108 +2248,352 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; - SDLoc dl(N); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); EVT NewVT = Inputs[0].getValueType(); unsigned NewElts = NewVT.getVectorNumElements(); + auto &&IsConstant = [](const SDValue &N) { + APInt SplatValue; + return N.getResNo() == 0 && + (ISD::isConstantSplatVector(N.getNode(), SplatValue) || + ISD::isBuildVectorOfConstantSDNodes(N.getNode())); + }; + auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1, + SDValue &Input2, + ArrayRef<int> Mask) { + assert(Input1->getOpcode() == ISD::BUILD_VECTOR && + Input2->getOpcode() == ISD::BUILD_VECTOR && + "Expected build vector node."); + EVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT)); + for (unsigned I = 0; I < NewElts; ++I) { + if (Mask[I] == UndefMaskElem) + continue; + unsigned Idx = Mask[I]; + if (Idx >= NewElts) + Ops[I] = Input2.getOperand(Idx - NewElts); + else + Ops[I] = Input1.getOperand(Idx); + // Make the type of all elements the same as the element type. + if (Ops[I].getValueType().bitsGT(EltVT)) + Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]); + } + return DAG.getBuildVector(NewVT, DL, Ops); + }; + // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - SmallVector<int, 16> Ops; - for (unsigned High = 0; High < 2; ++High) { - SDValue &Output = High ? Hi : Lo; - - // Build a shuffle mask for the output, discovering on the fly which - // input vectors to use as shuffle operands (recorded in InputUsed). - // If building a suitable shuffle vector proves too hard, then bail - // out with useBuildVector set. - unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. - unsigned FirstMaskIdx = High * NewElts; - bool useBuildVector = false; - for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - // The mask element. This indexes into the input. - int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); - - // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; - - if (Input >= array_lengthof(Inputs)) { - // The mask element does not index into any input vector. - Ops.push_back(-1); + SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end()); + // Try to pack incoming shuffles/inputs. + auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts, + &DL](SmallVectorImpl<int> &Mask) { + // Check if all inputs are shuffles of the same operands or non-shuffles. + MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs; + for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) { + SDValue Input = Inputs[Idx]; + auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode()); + if (!Shuffle || + Input.getOperand(0).getValueType() != Input.getValueType()) + continue; + ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))] + .push_back(Idx); + ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))] + .push_back(Idx); + } + for (auto &P : ShufflesIdxs) { + if (P.second.size() < 2) continue; + // Use shuffles operands instead of shuffles themselves. + // 1. Adjust mask. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (Inputs[SrcRegIdx].isUndef()) { + Idx = UndefMaskElem; + continue; + } + auto *Shuffle = + dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode()); + if (!Shuffle || !is_contained(P.second, SrcRegIdx)) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + if (MaskElt == UndefMaskElem) { + Idx = UndefMaskElem; + continue; + } + Idx = MaskElt % NewElts + + P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first + ? 0 + : 1] * + NewElts; } - - // Turn the index into an offset from the start of the input vector. - Idx -= Input * NewElts; - - // Find or create a shuffle vector operand to hold this input. - unsigned OpNo; - for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { - if (InputUsed[OpNo] == Input) { - // This input vector is already an operand. - break; - } else if (InputUsed[OpNo] == -1U) { - // Create a new operand for this input vector. - InputUsed[OpNo] = Input; - break; + // 2. Update inputs. + Inputs[P.second[0]] = P.first.first; + Inputs[P.second[1]] = P.first.second; + // Clear the pair data. + P.second.clear(); + ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear(); + } + // Check if any concat_vectors can be simplified. + SmallBitVector UsedSubVector(2 * array_lengthof(Inputs)); + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (Inputs[SrcRegIdx].isUndef()) { + Idx = UndefMaskElem; + continue; + } + TargetLowering::LegalizeTypeAction TypeAction = + getTypeAction(Inputs[SrcRegIdx].getValueType()); + if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS && + Inputs[SrcRegIdx].getNumOperands() == 2 && + !Inputs[SrcRegIdx].getOperand(1).isUndef() && + (TypeAction == TargetLowering::TypeLegal || + TypeAction == TargetLowering::TypeWidenVector)) + UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2)); + } + if (UsedSubVector.count() > 1) { + SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs; + for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1)) + continue; + if (Pairs.empty() || Pairs.back().size() == 2) + Pairs.emplace_back(); + if (UsedSubVector.test(2 * I)) { + Pairs.back().emplace_back(I, 0); + } else { + assert(UsedSubVector.test(2 * I + 1) && + "Expected to be used one of the subvectors."); + Pairs.back().emplace_back(I, 1); } } - - if (OpNo >= array_lengthof(InputUsed)) { - // More than two input vectors used! Give up on trying to create a - // shuffle vector. Insert all elements into a BUILD_VECTOR instead. - useBuildVector = true; - break; + if (!Pairs.empty() && Pairs.front().size() > 1) { + // Adjust mask. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + auto *It = find_if( + Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) { + return Idxs.front().first == SrcRegIdx || + Idxs.back().first == SrcRegIdx; + }); + if (It == Pairs.end()) + continue; + Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) + + (SrcRegIdx == It->front().first ? 0 : (NewElts / 2)); + } + // Adjust inputs. + for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) { + Inputs[Idxs.front().first] = DAG.getNode( + ISD::CONCAT_VECTORS, DL, + Inputs[Idxs.front().first].getValueType(), + Inputs[Idxs.front().first].getOperand(Idxs.front().second), + Inputs[Idxs.back().first].getOperand(Idxs.back().second)); + } } - - // Add the mask index for the new shuffle vector. - Ops.push_back(Idx + OpNo * NewElts); } - - if (useBuildVector) { - EVT EltVT = NewVT.getVectorElementType(); - SmallVector<SDValue, 16> SVOps; - - // Extract the input elements by hand. - for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - // The mask element. This indexes into the input. - int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); - - // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; - - if (Input >= array_lengthof(Inputs)) { - // The mask element is "undef" or indexes off the end of the input. - SVOps.push_back(DAG.getUNDEF(EltVT)); + bool Changed; + do { + // Try to remove extra shuffles (except broadcasts) and shuffles with the + // reused operands. + Changed = false; + for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode()); + if (!Shuffle) continue; + if (Shuffle->getOperand(0).getValueType() != NewVT) + continue; + int Op = -1; + if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() && + !Shuffle->isSplat()) { + Op = 0; + } else if (!Inputs[I].hasOneUse() && + !Shuffle->getOperand(1).isUndef()) { + // Find the only used operand, if possible. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (SrcRegIdx != I) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + if (MaskElt == UndefMaskElem) { + Idx = UndefMaskElem; + continue; + } + int OpIdx = MaskElt / NewElts; + if (Op == -1) { + Op = OpIdx; + continue; + } + if (Op != OpIdx) { + Op = -1; + break; + } + } + } + if (Op < 0) { + // Try to check if one of the shuffle operands is used already. + for (int OpIdx = 0; OpIdx < 2; ++OpIdx) { + if (Shuffle->getOperand(OpIdx).isUndef()) + continue; + auto *It = find(Inputs, Shuffle->getOperand(OpIdx)); + if (It == std::end(Inputs)) + continue; + int FoundOp = std::distance(std::begin(Inputs), It); + // Found that operand is used already. + // 1. Fix the mask for the reused operand. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (SrcRegIdx != I) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + if (MaskElt == UndefMaskElem) { + Idx = UndefMaskElem; + continue; + } + int MaskIdx = MaskElt / NewElts; + if (OpIdx == MaskIdx) + Idx = MaskElt % NewElts + FoundOp * NewElts; + } + // 2. Set Op to the unused OpIdx. + Op = (OpIdx + 1) % 2; + break; + } + } + if (Op >= 0) { + Changed = true; + Inputs[I] = Shuffle->getOperand(Op); + // Adjust mask. + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (SrcRegIdx != I) + continue; + int MaskElt = Shuffle->getMaskElt(Idx % NewElts); + int OpIdx = MaskElt / NewElts; + if (OpIdx != Op) + continue; + Idx = MaskElt % NewElts + SrcRegIdx * NewElts; + } } - - // Turn the index into an offset from the start of the input vector. - Idx -= Input * NewElts; - - // Extract the vector element by hand. - SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], - DAG.getVectorIdxConstant(Idx, dl))); } - - // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getBuildVector(NewVT, dl, SVOps); - } else if (InputUsed[0] == -1U) { - // No input vectors were used! The result is undefined. - Output = DAG.getUNDEF(NewVT); - } else { - SDValue Op0 = Inputs[InputUsed[0]]; - // If only one input was used, use an undefined vector for the other. - SDValue Op1 = InputUsed[1] == -1U ? - DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; - // At least one input vector was used. Create a new shuffle vector. - Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops); + } while (Changed); + }; + TryPeekThroughShufflesInputs(OrigMask); + // Proces unique inputs. + auto &&MakeUniqueInputs = [&Inputs, &IsConstant, + NewElts](SmallVectorImpl<int> &Mask) { + SetVector<SDValue> UniqueInputs; + SetVector<SDValue> UniqueConstantInputs; + for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + if (IsConstant(Inputs[I])) + UniqueConstantInputs.insert(Inputs[I]); + else if (!Inputs[I].isUndef()) + UniqueInputs.insert(Inputs[I]); + } + // Adjust mask in case of reused inputs. Also, need to insert constant + // inputs at first, otherwise it affects the final outcome. + if (UniqueInputs.size() != array_lengthof(Inputs)) { + auto &&UniqueVec = UniqueInputs.takeVector(); + auto &&UniqueConstantVec = UniqueConstantInputs.takeVector(); + unsigned ConstNum = UniqueConstantVec.size(); + for (int &Idx : Mask) { + if (Idx == UndefMaskElem) + continue; + unsigned SrcRegIdx = Idx / NewElts; + if (Inputs[SrcRegIdx].isUndef()) { + Idx = UndefMaskElem; + continue; + } + const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]); + if (It != UniqueConstantVec.end()) { + Idx = (Idx % NewElts) + + NewElts * std::distance(UniqueConstantVec.begin(), It); + assert(Idx >= 0 && "Expected defined mask idx."); + continue; + } + const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]); + assert(RegIt != UniqueVec.end() && "Cannot find non-const value."); + Idx = (Idx % NewElts) + + NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum); + assert(Idx >= 0 && "Expected defined mask idx."); + } + copy(UniqueConstantVec, std::begin(Inputs)); + copy(UniqueVec, std::next(std::begin(Inputs), ConstNum)); } + }; + MakeUniqueInputs(OrigMask); + SDValue OrigInputs[4]; + copy(Inputs, std::begin(OrigInputs)); + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; - Ops.clear(); + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands. + unsigned FirstMaskIdx = High * NewElts; + SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem); + copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); + assert(!Output && "Expected default initialized initial value."); + TryPeekThroughShufflesInputs(Mask); + MakeUniqueInputs(Mask); + SDValue TmpInputs[4]; + copy(Inputs, std::begin(TmpInputs)); + // Track changes in the output registers. + int UsedIdx = -1; + bool SecondIteration = false; + auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) { + if (UsedIdx < 0) { + UsedIdx = Idx; + return false; + } + if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx) + SecondIteration = true; + return SecondIteration; + }; + processShuffleMasks( + Mask, array_lengthof(Inputs), array_lengthof(Inputs), + /*NumOfUsedRegs=*/1, + [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); }, + [&Output, &DAG = DAG, NewVT, &DL, &Inputs, + &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) { + if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR) + Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask); + else + Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx], + DAG.getUNDEF(NewVT), Mask); + Inputs[Idx] = Output; + }, + [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs, + &TmpInputs, + &BuildVector](ArrayRef<int> Mask, unsigned Idx1, unsigned Idx2) { + if (AccumulateResults(Idx1)) { + if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR && + Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR) + Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask); + else + Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1], + Inputs[Idx2], Mask); + } else { + if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR && + TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR) + Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask); + else + Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1], + TmpInputs[Idx2], Mask); + } + Inputs[Idx1] = Output; + }); + copy(OrigInputs, std::begin(Inputs)); } } @@ -2268,6 +2634,32 @@ void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1)); } +void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue InLo, InHi; + GetSplitVector(N->getOperand(0), InLo, InHi); + SDLoc DL(N); + + Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); + Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc DL(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Expanded = TLI.expandVectorSplice(N, DAG); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded, + DAG.getVectorIdxConstant(0, DL)); + Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded, + DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); +} + //===----------------------------------------------------------------------===// // Operand Vector Splitting //===----------------------------------------------------------------------===// @@ -2294,16 +2686,19 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { report_fatal_error("Do not know how to split this operator's " "operand!\n"); + case ISD::VP_SETCC: case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = SplitVecOp_TruncateHelper(N); break; case ISD::STRICT_FP_ROUND: + case ISD::VP_FP_ROUND: case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; case ISD::STORE: @@ -2543,6 +2938,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Ch); + } else if (N->getNumOperands() == 3) { + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi); } else { Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); @@ -3128,8 +3531,20 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt); EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2); - LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); - HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + if (N->getOpcode() == ISD::SETCC) { + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + } else { + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); + LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1, + N->getOperand(2), MaskLo, EVLLo); + HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1, + N->getOperand(2), MaskHi, EVLHi); + } SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); EVT OpVT = N->getOperand(0).getValueType(); @@ -3160,6 +3575,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), Hi.getValue(1)); ReplaceValueWith(SDValue(N, 1), NewChain); + } else if (N->getOpcode() == ISD::VP_FP_ROUND) { + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL); + Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo); + Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi); } else { Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); @@ -3204,6 +3626,22 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { return; SDValue Res = SDValue(); + + auto unrollExpandedOp = [&]() { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. + EVT VT = N->getValueType(0); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + return true; + } + return false; + }; + switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -3223,6 +3661,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_ScalarOp(N); @@ -3235,6 +3674,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Select(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::VP_SETCC: case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: @@ -3280,6 +3720,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: + case ISD::AVGCEILS: + case ISD::AVGCEILU: // Vector-predicated binary op widening. Note that -- unlike the // unpredicated versions -- we don't have to worry about trapping on // operations like UDIV, FADD, etc., as we pass on the original vector @@ -3297,12 +3741,19 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOW: + case ISD::FREM: + if (unrollExpandedOp()) + break; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other binary ops. + LLVM_FALLTHROUGH; + case ISD::FADD: case ISD::FMUL: - case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: - case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -3338,6 +3789,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_FCOPYSIGN(N); break; + case ISD::IS_FPCLASS: + Res = WidenVecRes_IS_FPCLASS(N); + break; + case ISD::FPOWI: Res = WidenVecRes_POWI(N); break; @@ -3350,14 +3805,23 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::FP_EXTEND: + case ISD::VP_FP_EXTEND: case ISD::FP_ROUND: + case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::VP_FPTOSI: case ISD::FP_TO_UINT: + case ISD::VP_FPTOUI: case ISD::SIGN_EXTEND: + case ISD::VP_SIGN_EXTEND: case ISD::SINT_TO_FP: + case ISD::VP_SITOFP: + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: case ISD::UINT_TO_FP: + case ISD::VP_UITOFP: case ISD::ZERO_EXTEND: + case ISD::VP_ZERO_EXTEND: Res = WidenVecRes_Convert(N); break; @@ -3381,23 +3845,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: - case ISD::FTRUNC: { - // We're going to widen this vector op to a legal type by padding with undef - // elements. If the wide vector op is eventually going to be expanded to - // scalar libcalls, then unroll into scalar ops now to avoid unnecessary - // libcalls on the undef elements. - EVT VT = N->getValueType(0); - EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && - TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { - Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + case ISD::FTRUNC: + if (unrollExpandedOp()) break; - } - } - // If the target has custom/legal support for the scalar FP intrinsic ops - // (they are probably not destined to become libcalls), then widen those like - // any other unary ops. - LLVM_FALLTHROUGH; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other unary ops. + LLVM_FALLTHROUGH; case ISD::ABS: case ISD::BITREVERSE: @@ -3407,13 +3861,13 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - case ISD::FNEG: + case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; - case ISD::FMA: + case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: case ISD::FSHR: Res = WidenVecRes_Ternary(N); @@ -3432,7 +3886,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); SDValue InOp3 = GetWidenedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); + if (N->getNumOperands() == 3) + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); + + assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); + return DAG.getNode(N->getOpcode(), dl, WidenVT, + {InOp1, InOp2, InOp3, Mask, N->getOperand(4)}); } SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { @@ -3552,7 +4015,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorMinNumElements(); const SDNodeFlags Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; @@ -3566,6 +4029,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); } + // FIXME: Improve support for scalable vectors. + assert(!VT.isScalableVector() && "Scalable vectors not handled yet."); + // No legal vector version so unroll the vector operation and then widen. if (NumElts == 1) return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); @@ -3826,6 +4292,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTEC == WidenEC) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); + if (N->getNumOperands() == 3) { + assert(N->isVPOpcode() && "Expected VP opcode"); + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2)); + } return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); } if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { @@ -4007,6 +4479,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } +SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Arg = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, + N->getFlags()); +} + SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -4018,7 +4497,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); + if (N->getNumOperands() == 1) + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); + + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + assert(N->isVPOpcode() && "Expected VP opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, + {InOp, Mask, N->getOperand(2)}); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -4243,11 +4731,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - SDValue InOp = N->getOperand(0); - SDValue Idx = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); SDLoc dl(N); auto InOpTypeAction = getTypeAction(InOp.getValueType()); @@ -4264,6 +4752,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // Check if we can extract from the vector. unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); unsigned InNumElts = InVT.getVectorMinNumElements(); + unsigned VTNumElts = VT.getVectorMinNumElements(); + assert(IdxVal % VTNumElts == 0 && + "Expected Idx to be a multiple of subvector minimum vector length"); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); @@ -4277,8 +4768,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // nxv2i64 extract_subvector(nxv16i64, 8) // nxv2i64 extract_subvector(nxv16i64, 10) // undef) - unsigned VTNElts = VT.getVectorMinNumElements(); - unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts); + unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts); assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " "down type's element count"); EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, @@ -4287,7 +4777,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { SmallVector<SDValue> Parts; unsigned I = 0; - for (; I < VTNElts / GCD; ++I) + for (; I < VTNumElts / GCD; ++I) Parts.push_back( DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); @@ -4304,9 +4794,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector<SDValue, 16> Ops(WidenNumElts); - unsigned NumElts = VT.getVectorNumElements(); unsigned i; - for (i = 0; i < NumElts; ++i) + for (i = 0; i < VTNumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getVectorIdxConstant(IdxVal + i, dl)); @@ -4783,10 +5272,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE - ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, - N->getOperand(3)) - : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); + if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) + return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, + N->getOperand(3)); + return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { @@ -4832,13 +5321,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { N->getOperand(0).getValueType().isVector() && "Operands must be vectors"); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); SDValue InOp1 = N->getOperand(0); EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non-vector type"); - EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), WidenNumElts); + EVT WidenInVT = + EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC); // The input and output types often differ here, and it could be that while // we'd prefer to widen the result type, the input operands have been split. @@ -4865,8 +5354,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; - return DAG.getNode(ISD::SETCC, SDLoc(N), - WidenVT, InOp1, InOp2, N->getOperand(2)); + if (N->getOpcode() == ISD::VP_SETCC) { + SDValue Mask = + GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); + return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2, + N->getOperand(2), Mask, N->getOperand(4)); + } + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, + N->getOperand(2)); } SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { @@ -4946,6 +5441,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; + case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -5098,6 +5594,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { return DAG.UnrollVectorOp(N); } +SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) { + SDLoc DL(N); + EVT ResultVT = N->getValueType(0); + SDValue Test = N->getOperand(1); + SDValue WideArg = GetWidenedVector(N->getOperand(0)); + + // Process this node similarly to SETCC. + EVT WideResultVT = getSetCCResultType(WideArg.getValueType()); + if (ResultVT.getScalarType() == MVT::i1) + WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideResultVT.getVectorNumElements()); + + SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT, + {WideArg, Test}, N->getFlags()); + + // Extract the needed results from the result vector. + EVT ResVT = + EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(), + ResultVT.getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, WideNode, + DAG.getVectorIdxConstant(0, DL)); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, ResultVT, CC); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal. EVT VT = N->getValueType(0); @@ -5192,11 +5716,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { SDLoc dl(N); // Check if we can convert between two legal vector types and extract. - unsigned InWidenSize = InWidenVT.getSizeInBits(); - unsigned Size = VT.getSizeInBits(); + TypeSize InWidenSize = InWidenVT.getSizeInBits(); + TypeSize Size = VT.getSizeInBits(); // x86mmx is not an acceptable vector element type, so don't try. - if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { - unsigned NewNumElts = InWidenSize / Size; + if (!VT.isVector() && VT != MVT::x86mmx && + InWidenSize.hasKnownScalarFactor(Size)) { + unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size); EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); @@ -5211,9 +5736,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { // having to copy via memory. if (VT.isVector()) { EVT EltVT = VT.getVectorElementType(); - unsigned EltSize = EltVT.getSizeInBits(); - if (InWidenSize % EltSize == 0) { - unsigned NewNumElts = InWidenSize / EltSize; + unsigned EltSize = EltVT.getFixedSizeInBits(); + if (InWidenSize.isKnownMultipleOf(EltSize)) { + ElementCount NewNumElts = + (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits()) + .divideCoefficientBy(EltSize); EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); @@ -5266,18 +5793,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); - if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector) - InVec = GetWidenedVector(InVec); - if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); - if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() && + if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() && N->getConstantOperandVal(2) == 0) - return SubVec; + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, + N->getOperand(2)); report_fatal_error("Don't know how to widen the operands for " "INSERT_SUBVECTOR"); @@ -5500,11 +6026,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { Mask = GetWidenedMask(Mask, WideEC); WideMemVT = EVT::getVectorVT(*DAG.getContext(), VPSC->getMemoryVT().getScalarType(), WideEC); - } else if (OpNo == 4) { + } else if (OpNo == 3) { // Just widen the index. It's allowed to have extra elements. Index = GetWidenedVector(Index); } else - llvm_unreachable("Can't widen this operand of mscatter"); + llvm_unreachable("Can't widen this operand of VP_SCATTER"); SDValue Ops[] = { VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask, @@ -5597,8 +6123,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { assert(NeutralElem && "Neutral element must exist"); // Pad the vector with the neutral element. - unsigned OrigElts = OrigVT.getVectorNumElements(); - unsigned WideElts = WideVT.getVectorNumElements(); + unsigned OrigElts = OrigVT.getVectorMinNumElements(); + unsigned WideElts = WideVT.getVectorMinNumElements(); + + if (WideVT.isScalableVector()) { + unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + ElementCount::getScalable(GCD)); + SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, + DAG.getVectorIdxConstant(Idx, dl)); + return DAG.getNode(Opc, dl, N->getValueType(0), Op, Flags); + } + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); @@ -5622,8 +6160,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags); // Pad the vector with the neutral element. - unsigned OrigElts = OrigVT.getVectorNumElements(); - unsigned WideElts = WideVT.getVectorNumElements(); + unsigned OrigElts = OrigVT.getVectorMinNumElements(); + unsigned WideElts = WideVT.getVectorMinNumElements(); + + if (WideVT.isScalableVector()) { + unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + ElementCount::getScalable(GCD)); + SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); + for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) + Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, + DAG.getVectorIdxConstant(Idx, dl)); + return DAG.getNode(Opc, dl, N->getValueType(0), AccOp, Op, Flags); + } + for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, DAG.getVectorIdxConstant(Idx, dl)); @@ -5795,7 +6345,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, // Allow wider loads if they are sufficiently aligned to avoid memory faults // and if the original load is simple. unsigned LdAlign = - (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment(); + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); // Find the vector type that can load from. Optional<EVT> FirstVT = @@ -6103,7 +6653,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); - assert(!InVT.isScalableVector() && !NVT.isScalableVector() && + assert(InVT.isScalableVector() == NVT.isScalableVector() && "cannot modify scalable vectors in this way"); SDLoc dl(InOp); @@ -6111,10 +6661,10 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, if (InVT == NVT) return InOp; - unsigned InNumElts = InVT.getVectorNumElements(); - unsigned WidenNumElts = NVT.getVectorNumElements(); - if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { - unsigned NumConcat = WidenNumElts / InNumElts; + ElementCount InEC = InVT.getVectorElementCount(); + ElementCount WidenEC = NVT.getVectorElementCount(); + if (WidenEC.hasKnownScalarFactor(InEC)) { + unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC); SmallVector<SDValue, 16> Ops(NumConcat); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : DAG.getUNDEF(InVT); @@ -6125,10 +6675,16 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } - if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + if (InEC.hasKnownScalarFactor(WidenEC)) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, DAG.getVectorIdxConstant(0, dl)); + assert(!InVT.isScalableVector() && !NVT.isScalableVector() && + "Scalable vectors should have been handled already."); + + unsigned InNumElts = InEC.getFixedValue(); + unsigned WidenNumElts = WidenEC.getFixedValue(); + // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); EVT EltVT = NVT.getVectorElementType(); @@ -6144,29 +6700,3 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, Ops[Idx] = FillVal; return DAG.getBuildVector(NVT, dl, Ops); } - -void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue InLo, InHi; - GetSplitVector(N->getOperand(0), InLo, InHi); - SDLoc DL(N); - - Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); - Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); -} - -void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT VT = N->getValueType(0); - SDLoc DL(N); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Expanded = TLI.expandVectorSplice(N, DAG); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded, - DAG.getVectorIdxConstant(0, DL)); - Hi = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded, - DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); -} diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 3d5c4c5b1cae..e0e8d503ca92 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -28,21 +27,18 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "scheduler" -static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, - cl::ZeroOrMore, cl::init(false), - cl::desc("Disable use of DFA during scheduling")); +static cl::opt<bool> + DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::desc("Disable use of DFA during scheduling")); static cl::opt<int> RegPressureThreshold( - "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), - cl::desc("Track reg pressure and switch priority to in-depth")); + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index f64b332a7fef..9fcf692babdc 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -14,6 +14,7 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/DataTypes.h" #include <utility> diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 1a6be0cc2091..78fc407e9573 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,16 +11,14 @@ //===----------------------------------------------------------------------===// #include "InstrEmitter.h" -#include "ScheduleDAGSDNodes.h" #include "SDNodeDbgValue.h" -#include "llvm/ADT/STLExtras.h" +#include "ScheduleDAGSDNodes.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -442,17 +440,29 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector<SUnit*> &LiveRegDefs, + std::vector<SUnit *> &LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { - if (RegAdded.insert(*AI).second) { - LRegs.push_back(*AI); - Added = true; - } + // Check if Ref is live. + if (!LiveRegDefs[*AI]) + continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[*AI] == SU) + continue; + + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AI]->getNode() == Node) + continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(*AI).second) { + LRegs.push_back(*AI); + Added = true; } } return Added; @@ -504,6 +514,15 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, } continue; } + + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 7a5e8ac6075e..8a04ce7535a1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1294,11 +1294,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. -static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - SUnit **LiveRegDefs, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet<unsigned, 4> &RegAdded, SmallVectorImpl<unsigned> &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { // Check if Ref is live. @@ -1307,6 +1307,10 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, // Allow multiple uses of the same def. if (LiveRegDefs[*AliasI] == SU) continue; + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AliasI]->getNode() == Node) + continue; + // Add Reg to the set of interfering live regs. if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); @@ -1387,6 +1391,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { continue; } + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI, + SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; // If we're in the middle of scheduling a call, don't begin scheduling diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 92897aca7f6b..2a10157b404e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -884,7 +884,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (MI->isCandidateForCallSiteEntry() && DAG->getTarget().Options.EmitCallSiteInfo) - MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + MF.addCallArgsForwardingRegs(MI, DAG->getCallSiteInfo(Node)); if (DAG->getNoMergeSiteInfo(Node)) { MI->setFlag(MachineInstr::MIFlag::NoMerge); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 10940478010e..1ba1fd65b8c9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -19,19 +19,15 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include <climits> using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 40d861702e86..b3b8756ae9ba 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,9 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -55,7 +53,6 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -144,11 +141,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - SplatVal = Op0->getAPIntValue().truncOrSelf(EltSize); + SplatVal = Op0->getAPIntValue().trunc(EltSize); return true; } if (auto *Op0 = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) { - SplatVal = Op0->getValueAPF().bitcastToAPInt().truncOrSelf(EltSize); + SplatVal = Op0->getValueAPF().bitcastToAPInt().trunc(EltSize); return true; } } @@ -714,6 +711,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + ID.AddInteger(LD->getMemOperand()->getFlags()); break; } case ISD::STORE: { @@ -721,6 +719,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + ID.AddInteger(ST->getMemOperand()->getFlags()); break; } case ISD::VP_LOAD: { @@ -728,6 +727,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ELD->getMemoryVT().getRawBits()); ID.AddInteger(ELD->getRawSubclassData()); ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); + ID.AddInteger(ELD->getMemOperand()->getFlags()); break; } case ISD::VP_STORE: { @@ -735,6 +735,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(EST->getMemoryVT().getRawBits()); ID.AddInteger(EST->getRawSubclassData()); ID.AddInteger(EST->getPointerInfo().getAddrSpace()); + ID.AddInteger(EST->getMemOperand()->getFlags()); + break; + } + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: { + const VPStridedLoadSDNode *SLD = cast<VPStridedLoadSDNode>(N); + ID.AddInteger(SLD->getMemoryVT().getRawBits()); + ID.AddInteger(SLD->getRawSubclassData()); + ID.AddInteger(SLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: { + const VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N); + ID.AddInteger(SST->getMemoryVT().getRawBits()); + ID.AddInteger(SST->getRawSubclassData()); + ID.AddInteger(SST->getPointerInfo().getAddrSpace()); break; } case ISD::VP_GATHER: { @@ -742,6 +757,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(EG->getMemoryVT().getRawBits()); ID.AddInteger(EG->getRawSubclassData()); ID.AddInteger(EG->getPointerInfo().getAddrSpace()); + ID.AddInteger(EG->getMemOperand()->getFlags()); break; } case ISD::VP_SCATTER: { @@ -749,6 +765,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ES->getMemoryVT().getRawBits()); ID.AddInteger(ES->getRawSubclassData()); ID.AddInteger(ES->getPointerInfo().getAddrSpace()); + ID.AddInteger(ES->getMemOperand()->getFlags()); break; } case ISD::MLOAD: { @@ -756,6 +773,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MLD->getMemoryVT().getRawBits()); ID.AddInteger(MLD->getRawSubclassData()); ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); + ID.AddInteger(MLD->getMemOperand()->getFlags()); break; } case ISD::MSTORE: { @@ -763,6 +781,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MST->getMemoryVT().getRawBits()); ID.AddInteger(MST->getRawSubclassData()); ID.AddInteger(MST->getPointerInfo().getAddrSpace()); + ID.AddInteger(MST->getMemOperand()->getFlags()); break; } case ISD::MGATHER: { @@ -770,6 +789,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MG->getMemoryVT().getRawBits()); ID.AddInteger(MG->getRawSubclassData()); ID.AddInteger(MG->getPointerInfo().getAddrSpace()); + ID.AddInteger(MG->getMemOperand()->getFlags()); break; } case ISD::MSCATTER: { @@ -777,6 +797,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(MS->getMemoryVT().getRawBits()); ID.AddInteger(MS->getRawSubclassData()); ID.AddInteger(MS->getPointerInfo().getAddrSpace()); + ID.AddInteger(MS->getMemOperand()->getFlags()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -799,11 +820,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + ID.AddInteger(AT->getMemOperand()->getFlags()); break; } case ISD::PREFETCH: { const MemSDNode *PF = cast<MemSDNode>(N); ID.AddInteger(PF->getPointerInfo().getAddrSpace()); + ID.AddInteger(PF->getMemOperand()->getFlags()); break; } case ISD::VECTOR_SHUFFLE: { @@ -821,11 +844,18 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(BA->getTargetFlags()); break; } + case ISD::AssertAlign: + ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value()); + break; } // end switch (N->getOpcode()) - // Target specific memory nodes could also have address spaces to check. - if (N->isTargetMemoryOpcode()) - ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); + // Target specific memory nodes could also have address spaces and flags + // to check. + if (N->isTargetMemoryOpcode()) { + const MemSDNode *MN = cast<MemSDNode>(N); + ID.AddInteger(MN->getPointerInfo().getAddrSpace()); + ID.AddInteger(MN->getMemOperand()->getFlags()); + } } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -1395,6 +1425,12 @@ SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, TrueValue); } +SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, + SDValue Mask, SDValue EVL, EVT VT) { + SDValue TrueValue = getBoolConstant(true, DL, VT, VT); + return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL); +} + SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT) { if (!V) @@ -2433,23 +2469,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { if (VT.isScalableVector()) return SDValue(); - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnes(VT.getVectorNumElements()) - : APInt(1, 1); - return GetDemandedBits(V, DemandedBits, DemandedElts); -} - -/// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by DemandedBits are used in the elements specified by -/// DemandedElts. -/// TODO: really we should be making this into the DAG equivalent of -/// SimplifyMultipleUseDemandedBits and not generate any new nodes. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, - const APInt &DemandedElts) { switch (V.getOpcode()) { default: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, - *this); + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this); case ISD::Constant: { const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; @@ -2469,8 +2491,8 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, if (Amt >= DemandedBits.getBitWidth()) break; APInt SrcDemandedBits = DemandedBits << Amt; - if (SDValue SimplifyLHS = - GetDemandedBits(V.getOperand(0), SrcDemandedBits)) + if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits( + V.getOperand(0), SrcDemandedBits, *this)) return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } @@ -2503,6 +2525,14 @@ bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); } +/// MaskedVectorIsZero - Return true if 'Op' is known to be zero in +/// DemandedElts. We use this predicate to simplify operations downstream. +bool SelectionDAG::MaskedVectorIsZero(SDValue V, const APInt &DemandedElts, + unsigned Depth /* = 0 */) const { + APInt Mask = APInt::getAllOnes(V.getScalarValueSizeInBits()); + return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); +} + /// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, unsigned Depth) const { @@ -2587,9 +2617,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return true; } case ISD::VECTOR_SHUFFLE: { - // Check if this is a shuffle node doing a splat. - // TODO: Do we need to handle shuffle(splat, undef, mask)? - int SplatIndex = -1; + // Check if this is a shuffle node doing a splat or a shuffle of a splat. + APInt DemandedLHS = APInt::getNullValue(NumElts); + APInt DemandedRHS = APInt::getNullValue(NumElts); ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask(); for (int i = 0; i != (int)NumElts; ++i) { int M = Mask[i]; @@ -2599,11 +2629,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } if (!DemandedElts[i]) continue; - if (0 <= SplatIndex && SplatIndex != M) - return false; - SplatIndex = M; + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); } - return true; + + // If we aren't demanding either op, assume there's no splat. + // If we are demanding both ops, assume there's no splat. + if ((DemandedLHS.isZero() && DemandedRHS.isZero()) || + (!DemandedLHS.isZero() && !DemandedRHS.isZero())) + return false; + + // See if the demanded elts of the source op is a splat or we only demand + // one element, which should always be a splat. + // TODO: Handle source ops splats with undefs. + auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) { + APInt SrcUndefs; + return (SrcElts.countPopulation() == 1) || + (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) && + (SrcElts & SrcUndefs).isZero()); + }; + if (!DemandedLHS.isZero()) + return CheckSplatSrc(V.getOperand(0), DemandedLHS); + return CheckSplatSrc(V.getOperand(1), DemandedRHS); } case ISD::EXTRACT_SUBVECTOR: { // Offset the demanded elts by the subvector index. @@ -2614,7 +2663,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { UndefElts = UndefSrcElts.extractBits(NumElts, Idx); return true; @@ -2631,9 +2680,49 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return false; unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts, Depth + 1)) { - UndefElts = UndefSrcElts.truncOrSelf(NumElts); + UndefElts = UndefSrcElts.trunc(NumElts); + return true; + } + break; + } + case ISD::BITCAST: { + SDValue Src = V.getOperand(0); + EVT SrcVT = Src.getValueType(); + unsigned SrcBitWidth = SrcVT.getScalarSizeInBits(); + unsigned BitWidth = VT.getScalarSizeInBits(); + + // Ignore bitcasts from unsupported types. + // TODO: Add fp support? + if (!SrcVT.isVector() || !SrcVT.isInteger() || !VT.isInteger()) + break; + + // Bitcast 'small element' vector to 'large element' vector. + if ((BitWidth % SrcBitWidth) == 0) { + // See if each sub element is a splat. + unsigned Scale = BitWidth / SrcBitWidth; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt ScaledDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); + for (unsigned I = 0; I != Scale; ++I) { + APInt SubUndefElts; + APInt SubDemandedElt = APInt::getOneBitSet(Scale, I); + APInt SubDemandedElts = APInt::getSplat(NumSrcElts, SubDemandedElt); + SubDemandedElts &= ScaledDemandedElts; + if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) + return false; + + // Here we can't do "MatchAnyBits" operation merge for undef bits. + // Because some operation only use part value of the source. + // Take llvm.fshl.* for example: + // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32 + // t2: v2i64 = bitcast t1 + // t5: v2i64 = fshl t3, t4, t2 + // We can not convert t2 to {i64 undef, i64 undef} + UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts, + /*MatchAllBits=*/true); + } return true; } break; @@ -2978,7 +3067,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); break; } @@ -3083,9 +3172,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); // TODO: SelfMultiply can be poison, but not undef. - SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( - Op.getOperand(0), DemandedElts, false, Depth + 1); + if (SelfMultiply) + SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), DemandedElts, false, Depth + 1); Known = KnownBits::mul(Known, Known2, SelfMultiply); + + // If the multiplication is known not to overflow, the product of a number + // with itself is non-negative. Only do this if we didn't already computed + // the opposite value for the sign bit. + if (Op->getFlags().hasNoSignedWrap() && + Op.getOperand(0) == Op.getOperand(1) && + !Known.isNegative()) + Known.makeNonNegative(); break; } case ISD::MULHU: { @@ -3128,6 +3226,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::udiv(Known, Known2); break; } + case ISD::AVGCEILU: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = Known.zext(BitWidth + 1); + Known2 = Known2.zext(BitWidth + 1); + KnownBits One = KnownBits::makeConstant(APInt(1, 1)); + Known = KnownBits::computeForAddCarry(Known, Known2, One); + Known = Known.extractBits(BitWidth, 1); + break; + } case ISD::SELECT: case ISD::VSELECT: Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); @@ -3330,7 +3438,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.zext(BitWidth); break; @@ -3342,7 +3450,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::SIGN_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. @@ -3358,7 +3466,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::ANY_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); Known = Known.anyext(BitWidth); break; @@ -3605,6 +3713,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::smax(Known, Known2); else Known = KnownBits::smin(Known, Known2); + + // For SMAX, if CstLow is non-negative we know the result will be + // non-negative and thus all sign bits are 0. + // TODO: There's an equivalent of this for smin with negative constant for + // known ones. + if (IsMax && CstLow) { + const APInt &ValueLow = CstLow->getAPIntValue(); + if (ValueLow.isNonNegative()) { + unsigned SignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Known.Zero.setHighBits(std::min(SignBits, ValueLow.getNumSignBits())); + } + } + break; } case ISD::FP_TO_UINT_SAT: { @@ -3905,7 +4026,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SIGN_EXTEND_VECTOR_INREG: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements()); + APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); Tmp = VTBits - SrcVT.getScalarSizeInBits(); return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; } @@ -4192,7 +4313,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { @@ -4585,26 +4706,54 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +// Only bits set in Mask must be negated, other bits may be arbitrary. +SDValue llvm::getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs) { + if (isBitwiseNot(V, AllowUndefs)) + return V.getOperand(0); + + // Handle any_extend (not (truncate X)) pattern, where Mask only sets + // bits in the non-extended part. + ConstantSDNode *MaskC = isConstOrConstSplat(Mask); + if (!MaskC || V.getOpcode() != ISD::ANY_EXTEND) + return SDValue(); + SDValue ExtArg = V.getOperand(0); + if (ExtArg.getScalarValueSizeInBits() >= + MaskC->getAPIntValue().getActiveBits() && + isBitwiseNot(ExtArg, AllowUndefs) && + ExtArg.getOperand(0).getOpcode() == ISD::TRUNCATE && + ExtArg.getOperand(0).getOperand(0).getValueType() == V.getValueType()) + return ExtArg.getOperand(0).getOperand(0); + return SDValue(); +} + +static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) { + // Match masked merge pattern (X & ~M) op (Y & M) + // Including degenerate case (X & ~M) op M + auto MatchNoCommonBitsPattern = [&](SDValue Not, SDValue Mask, + SDValue Other) { + if (SDValue NotOperand = + getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) { + if (Other == NotOperand) + return true; + if (Other->getOpcode() == ISD::AND) + return NotOperand == Other->getOperand(0) || + NotOperand == Other->getOperand(1); + } + return false; + }; + if (A->getOpcode() == ISD::AND) + return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) || + MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B); + return false; +} + // FIXME: unify with llvm::haveNoCommonBitsSet. bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - // Match masked merge pattern (X & ~M) op (Y & M) - if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) { - auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) { - if (isBitwiseNot(NotM, true)) { - SDValue NotOperand = NotM->getOperand(0); - return NotOperand == And->getOperand(0) || - NotOperand == And->getOperand(1); - } - return false; - }; - if (MatchNoCommonBitsPattern(A->getOperand(0), B) || - MatchNoCommonBitsPattern(A->getOperand(1), B) || - MatchNoCommonBitsPattern(B->getOperand(0), A) || - MatchNoCommonBitsPattern(B->getOperand(1), A)) - return true; - } + if (haveNoCommonBitsSetCommutative(A, B) || + haveNoCommonBitsSetCommutative(B, A)) + return true; return KnownBits::haveNoCommonBitsSet(computeKnownBits(A), computeKnownBits(B)); } @@ -4833,9 +4982,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); - case ISD::FP16_TO_FP: { + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: { bool Ignored; - APFloat FPV(APFloat::IEEEhalf(), + APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() + : APFloat::BFloat(), (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); // This can return overflow, underflow, or inexact; we don't care. @@ -4909,11 +5060,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; - case ISD::FP_TO_FP16: { + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: { bool Ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(APFloat::IEEEhalf(), + (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() + : APFloat::BFloat(), APFloat::rmNearestTiesToEven, &Ignored); return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); } @@ -4965,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); + if (isGuaranteedNotToBeUndefOrPoison(Operand)) + return Operand; break; case ISD::TokenFactor: case ISD::MERGE_VALUES: @@ -5114,7 +5269,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isInteger() && VT == Operand.getValueType() && "Invalid ABS!"); if (OpOpcode == ISD::UNDEF) - return getUNDEF(VT); + return getConstant(0, DL, VT); break; case ISD::BSWAP: assert(VT.isInteger() && VT == Operand.getValueType() && @@ -5182,6 +5337,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (Operand.getValueType().getScalarType() == MVT::i1) return getNOT(DL, Operand, Operand.getValueType()); break; + case ISD::VECREDUCE_ADD: + if (Operand.getValueType().getScalarType() == MVT::i1) + return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand); + break; case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: if (Operand.getValueType().getScalarType() == MVT::i1) @@ -5273,6 +5432,30 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, APInt C2Ext = C2.zext(FullWidth); return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth()); } + case ISD::AVGFLOORS: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.sext(FullWidth); + APInt C2Ext = C2.sext(FullWidth); + return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1); + } + case ISD::AVGFLOORU: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.zext(FullWidth); + APInt C2Ext = C2.zext(FullWidth); + return (C1Ext + C2Ext).extractBits(C1.getBitWidth(), 1); + } + case ISD::AVGCEILS: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.sext(FullWidth); + APInt C2Ext = C2.sext(FullWidth); + return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); + } + case ISD::AVGCEILU: { + unsigned FullWidth = C1.getBitWidth() + 1; + APInt C1Ext = C1.zext(FullWidth); + APInt C2Ext = C2.zext(FullWidth); + return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); + } } return llvm::None; } @@ -5355,7 +5538,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (!FoldAttempt) return SDValue(); - SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); + SDValue Folded = getConstant(*FoldAttempt, DL, VT); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -5400,7 +5583,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); if (!Fold) break; - RawBits.push_back(Fold.getValue()); + RawBits.push_back(*Fold); } if (RawBits.size() == NumElts.getFixedValue()) { // We have constant folded, but we need to cast this again back to @@ -5416,7 +5599,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { if (DstUndefs[I]) continue; - Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT); + Ops[I] = getConstant(DstBits[I].sext(BVEltBits), DL, BVEltVT); } return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); } @@ -5455,9 +5638,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); - // If we are comparing vectors, then the result needs to be a i1 boolean - // that is then sign-extended back to the legal result type. + // If we are comparing vectors, then the result needs to be a i1 boolean that + // is then extended back to the legal result type depending on how booleans + // are represented. EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + ISD::NodeType ExtendCode = + (Opcode == ISD::SETCC && SVT != VT.getScalarType()) + ? TargetLowering::getExtendForContent(TLI->getBooleanContents(VT)) + : ISD::SIGN_EXTEND; // Find legal integer scalar type for constant promotion and // ensure that its scalar size is at least as large as source. @@ -5515,7 +5703,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) - ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + ScalarResult = getNode(ExtendCode, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && @@ -5639,20 +5827,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(Opcode, DL, VT, N1, N2, Flags); } +void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, + SDValue &N2) const { + if (!TLI->isCommutativeBinOp(Opcode)) + return; + + // Canonicalize: + // binop(const, nonconst) -> binop(nonconst, const) + bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); + bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); + bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) + std::swap(N1, N2); + + // Canonicalize: + // binop(splat(x), step_vector) -> binop(step_vector, splat(x)) + else if (N1.getOpcode() == ISD::SPLAT_VECTOR && + N2.getOpcode() == ISD::STEP_VECTOR) + std::swap(N1, N2); +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { assert(N1.getOpcode() != ISD::DELETED_NODE && N2.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - // Canonicalize constant to RHS if commutative. - if (TLI->isCommutativeBinOp(Opcode)) { - bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); - bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); - bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); - bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); - if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) - std::swap(N1, N2); - } + + canonicalizeCommutativeBinop(Opcode, N1, N2); auto *N1C = dyn_cast<ConstantSDNode>(N1); auto *N2C = dyn_cast<ConstantSDNode>(N2); @@ -5956,6 +6158,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { if (VT == N1.getOperand(1).getValueType()) return N1.getOperand(1); + if (VT.isFloatingPoint()) { + assert(VT.getSizeInBits() > N1.getOperand(1).getValueType().getSizeInBits()); + return getFPExtendOrRound(N1.getOperand(1), DL, VT); + } return getSExtOrTrunc(N1.getOperand(1), DL, VT); } return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); @@ -6053,9 +6259,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef + case ISD::SIGN_EXTEND_INREG: case ISD::UDIV: case ISD::SDIV: case ISD::UREM: @@ -6544,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign = NewAlign / 2; + NewAlign = NewAlign.previous(); if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. @@ -6792,17 +6998,18 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// \param Size Number of bytes to write. /// \param Alignment Alignment of the destination in bytes. /// \param isVol True if destination is volatile. +/// \param AlwaysInline Makes sure no function call is generated. /// \param DstPtrInfo IR information on the memory pointer. /// \returns New head in the control flow, if lowering was successful, empty /// SDValue otherwise. /// /// The function tries to replace 'llvm.memset' intrinsic with several store /// operations and value calculation code. This is usually profitable for small -/// memory size. +/// memory size or when the semantic requires inlining. static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, Align Alignment, bool isVol, - MachinePointerInfo DstPtrInfo, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. @@ -6822,8 +7029,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, DstAlignCanChange = true; bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero(); + unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); + if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOps, Limit, MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); @@ -6974,10 +7183,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, } SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, - SDValue Dst, unsigned DstAlign, - SDValue Src, unsigned SrcAlign, - SDValue Size, Type *SizeTy, - unsigned ElemSz, bool isTailCall, + SDValue Dst, SDValue Src, SDValue Size, + Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Emit a library call. @@ -7077,10 +7285,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, } SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, - SDValue Dst, unsigned DstAlign, - SDValue Src, unsigned SrcAlign, - SDValue Size, Type *SizeTy, - unsigned ElemSz, bool isTailCall, + SDValue Dst, SDValue Src, SDValue Size, + Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Emit a library call. @@ -7119,7 +7326,7 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool isTailCall, + bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Check to see if we should lower the memset to stores first. @@ -7132,7 +7339,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, DstPtrInfo, AAInfo); + isVol, false, DstPtrInfo, AAInfo); if (Result.getNode()) return Result; @@ -7142,45 +7349,75 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( - *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo); + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo); if (Result.getNode()) return Result; } + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, AAInfo); + assert(Result && + "getMemsetStores must return a valid sequence when AlwaysInline"); + return Result; + } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); - Args.push_back(Entry); - Entry.Node = Src; - Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); - Args.push_back(Entry); - Entry.Node = Size; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Args.push_back(Entry); + auto &Ctx = *getContext(); + const auto& DL = getDataLayout(); - // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) - .setDiscardResult() - .setTailCall(isTailCall); + // FIXME: pass in SDLoc + CLI.setDebugLoc(dl).setChain(Chain); + + ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src); + const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero(); + const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); + + // Helper function to create an Entry from Node and Type. + const auto CreateEntry = [](SDValue Node, Type *Ty) { + TargetLowering::ArgListEntry Entry; + Entry.Node = Node; + Entry.Ty = Ty; + return Entry; + }; - std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + // If zeroing out and bzero is present, use it. + if (SrcIsZero && BzeroName) { + TargetLowering::ArgListTy Args; + Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + CLI.setLibCallee( + TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), + getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); + } else { + TargetLowering::ArgListTy Args; + Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); + Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Dst.getValueType().getTypeForEVT(Ctx), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(DL)), + std::move(Args)); + } + + CLI.setDiscardResult().setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, - SDValue Dst, unsigned DstAlign, - SDValue Value, SDValue Size, Type *SizeTy, - unsigned ElemSz, bool isTailCall, + SDValue Dst, SDValue Value, SDValue Size, + Type *SizeTy, unsigned ElemSz, + bool isTailCall, MachinePointerInfo DstPtrInfo) { // Emit a library call. TargetLowering::ArgListTy Args; @@ -7224,6 +7461,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void* IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -7336,6 +7574,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( Opcode, dl.getIROrder(), VTList, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); @@ -7508,6 +7747,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>( dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -7609,6 +7849,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -7675,6 +7916,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -7703,6 +7945,7 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + ID.AddInteger(ST->getMemOperand()->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); @@ -7760,6 +8003,7 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>( dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<VPLoadSDNode>(E)->refineAlignment(MMO); @@ -7852,6 +8096,7 @@ SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<VPStoreSDNode>(E)->refineAlignment(MMO); @@ -7922,6 +8167,7 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<VPStoreSDNode>(E)->refineAlignment(MMO); @@ -7952,6 +8198,7 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + ID.AddInteger(ST->getMemOperand()->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); @@ -7968,6 +8215,259 @@ SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, return V; } +SDValue SelectionDAG::getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + + uint64_t Size = MemoryLocation::UnknownSize; + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); + return getStridedLoadVP(AM, ExtType, VT, DL, Chain, Ptr, Offset, Stride, Mask, + EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getStridedLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, + SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding) { + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + + SDValue Ops[] = {Chain, Ptr, Offset, Stride, Mask, EVL}; + SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStridedLoadSDNode>( + DL.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast<VPStridedLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = + newSDNode<VPStridedLoadSDNode>(DL.getIROrder(), DL.getDebugLoc(), VTs, AM, + ExtType, IsExpanding, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getStridedLoadVP( + EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, + Undef, Stride, Mask, EVL, PtrInfo, VT, Alignment, + MMOFlags, AAInfo, Ranges, IsExpanding); +} + +SDValue SelectionDAG::getStridedLoadVP(EVT VT, const SDLoc &DL, SDValue Chain, + SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, DL, Chain, Ptr, + Undef, Stride, Mask, EVL, VT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getExtStridedLoadVP( + ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, + Stride, Mask, EVL, PtrInfo, MemVT, Alignment, + MMOFlags, AAInfo, nullptr, IsExpanding); +} + +SDValue SelectionDAG::getExtStridedLoadVP( + ISD::LoadExtType ExtType, const SDLoc &DL, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getStridedLoadVP(ISD::UNINDEXED, ExtType, VT, DL, Chain, Ptr, Undef, + Stride, Mask, EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getIndexedStridedLoadVP(SDValue OrigLoad, const SDLoc &DL, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *SLD = cast<VPStridedLoadSDNode>(OrigLoad); + assert(SLD->getOffset().isUndef() && + "Strided load is already a indexed load!"); + // Don't propagate the invariant or dereferenceable flags. + auto MMOFlags = + SLD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + return getStridedLoadVP( + AM, SLD->getExtensionType(), OrigLoad.getValueType(), DL, SLD->getChain(), + Base, Offset, SLD->getStride(), SLD->getMask(), SLD->getVectorLength(), + SLD->getPointerInfo(), SLD->getMemoryVT(), SLD->getAlign(), MMOFlags, + SLD->getAAInfo(), nullptr, SLD->isExpandingLoad()); +} + +SDValue SelectionDAG::getStridedStoreVP(SDValue Chain, const SDLoc &DL, + SDValue Val, SDValue Ptr, + SDValue Offset, SDValue Stride, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, + bool IsTruncating, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed vp_store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Ptr.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Ptr, Offset, Stride, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>( + DL.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(), + VTs, AM, IsTruncating, + IsCompressing, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getTruncStridedStoreVP( + SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Stride, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT SVT, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemoryLocation::UnknownSize, Alignment, AAInfo); + return getTruncStridedStoreVP(Chain, DL, Val, Ptr, Stride, Mask, EVL, SVT, + MMO, IsCompressing); +} + +SDValue SelectionDAG::getTruncStridedStoreVP(SDValue Chain, const SDLoc &DL, + SDValue Val, SDValue Ptr, + SDValue Stride, SDValue Mask, + SDValue EVL, EVT SVT, + MachineMemOperand *MMO, + bool IsCompressing) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + if (VT == SVT) + return getStridedStoreVP(Chain, DL, Val, Ptr, getUNDEF(Ptr.getValueType()), + Stride, Mask, EVL, VT, MMO, ISD::UNINDEXED, + /*IsTruncating*/ false, IsCompressing); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == SVT.getVectorElementCount()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Stride, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStridedStoreSDNode>( + DL.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + cast<VPStridedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPStridedStoreSDNode>(DL.getIROrder(), DL.getDebugLoc(), + VTs, ISD::UNINDEXED, true, + IsCompressing, SVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedStridedStoreVP(SDValue OrigStore, + const SDLoc &DL, SDValue Base, + SDValue Offset, + ISD::MemIndexedMode AM) { + auto *SST = cast<VPStridedStoreSDNode>(OrigStore); + assert(SST->getOffset().isUndef() && + "Strided store is already an indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { + SST->getChain(), SST->getValue(), Base, Offset, SST->getStride(), + SST->getMask(), SST->getVectorLength()}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EXPERIMENTAL_VP_STRIDED_STORE, VTs, Ops); + ID.AddInteger(SST->getMemoryVT().getRawBits()); + ID.AddInteger(SST->getRawSubclassData()); + ID.AddInteger(SST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<VPStridedStoreSDNode>( + DL.getIROrder(), DL.getDebugLoc(), VTs, AM, SST->isTruncatingStore(), + SST->isCompressingStore(), SST->getMemoryVT(), SST->getMemOperand()); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType) { @@ -7979,6 +8479,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>( dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<VPGatherSDNode>(E)->refineAlignment(MMO); @@ -8022,6 +8523,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>( dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<VPScatterSDNode>(E)->refineAlignment(MMO); @@ -8071,6 +8573,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); @@ -8118,6 +8621,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); @@ -8159,13 +8663,13 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( dl.getIROrder(), VTs, MemVT, MMO, IndexType, ExtTy)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]); auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, MMO, IndexType, ExtTy); createOperands(N, Ops); @@ -8206,13 +8710,13 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( dl.getIROrder(), VTs, MemVT, MMO, IndexType, IsTrunc)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - IndexType = TLI->getCanonicalIndexType(IndexType, MemVT, Ops[4]); auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, MemVT, MMO, IndexType, IsTrunc); createOperands(N, Ops); @@ -8410,6 +8914,41 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Ops[2].getValueType() == Ops[3].getValueType() && "LHS/RHS of comparison should match types!"); break; + case ISD::VP_ADD: + case ISD::VP_SUB: + // If it is VP_ADD/VP_SUB mask operation then turn it to VP_XOR + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + Opcode = ISD::VP_XOR; + break; + case ISD::VP_MUL: + // If it is VP_MUL mask operation then turn it to VP_AND + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) + Opcode = ISD::VP_AND; + break; + case ISD::VP_REDUCE_MUL: + // If it is VP_REDUCE_MUL mask operation then turn it to VP_REDUCE_AND + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_AND; + break; + case ISD::VP_REDUCE_ADD: + // If it is VP_REDUCE_ADD mask operation then turn it to VP_REDUCE_XOR + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_XOR; + break; + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_UMIN: + // If it is VP_REDUCE_SMAX/VP_REDUCE_UMIN mask operation then turn it to + // VP_REDUCE_AND. + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_AND; + break; + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + // If it is VP_REDUCE_SMIN/VP_REDUCE_UMAX mask operation then turn it to + // VP_REDUCE_OR. + if (VT == MVT::i1) + Opcode = ISD::VP_REDUCE_OR; + break; } // Memoize nodes. @@ -8456,7 +8995,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) - return getNode(Opcode, DL, VTList.VTs[0], Ops); + return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); #ifndef NDEBUG for (auto &Op : Ops) @@ -9669,19 +10208,36 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ namespace { - /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith - /// to record information about a use. - struct UseMemo { - SDNode *User; - unsigned Index; - SDUse *Use; - }; +/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith +/// to record information about a use. +struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; +}; - /// operator< - Sort Memos by User. - bool operator<(const UseMemo &L, const UseMemo &R) { - return (intptr_t)L.User < (intptr_t)R.User; +/// operator< - Sort Memos by User. +bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; +} + +/// RAUOVWUpdateListener - Helper for ReplaceAllUsesOfValuesWith - When the node +/// pointed to by a UseMemo is deleted, set the User to nullptr to indicate that +/// the node already has been taken care of recursively. +class RAUOVWUpdateListener : public SelectionDAG::DAGUpdateListener { + SmallVector<UseMemo, 4> &Uses; + + void NodeDeleted(SDNode *N, SDNode *E) override { + for (UseMemo &Memo : Uses) + if (Memo.User == N) + Memo.User = nullptr; } +public: + RAUOVWUpdateListener(SelectionDAG &d, SmallVector<UseMemo, 4> &uses) + : SelectionDAG::DAGUpdateListener(d), Uses(uses) {} +}; + } // end anonymous namespace bool SelectionDAG::calculateDivergence(SDNode *N) { @@ -9773,12 +10329,19 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, // Sort the uses, so that all the uses from a given User are together. llvm::sort(Uses); + RAUOVWUpdateListener Listener(*this, Uses); for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); UseIndex != UseIndexEnd; ) { // We know that this user uses some value of From. If it is the right // value, update it. SDNode *User = Uses[UseIndex].User; + // If the node has been deleted by recursive CSE updates when updating + // another node, then just skip this entry. + if (User == nullptr) { + ++UseIndex; + continue; + } // This node is about to morph, remove its old self from the CSE maps. RemoveNodeFromCSEMaps(User); @@ -9975,6 +10538,11 @@ bool llvm::isOneConstant(SDValue V) { return Const != nullptr && Const->isOne(); } +bool llvm::isMinSignedConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isMinSignedValue(); +} + SDValue llvm::peekThroughBitcasts(SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); @@ -10105,10 +10673,9 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { } bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { - // TODO: may want to use peekThroughBitcast() here. - unsigned BitWidth = N.getScalarValueSizeInBits(); - ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); - return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth; + ConstantSDNode *C = + isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation*/ true); + return C && C->isOne(); } bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { @@ -10957,9 +11524,8 @@ bool BuildVectorSDNode::getConstantRawBits( auto *CInt = dyn_cast<ConstantSDNode>(Op); auto *CFP = dyn_cast<ConstantFPSDNode>(Op); assert((CInt || CFP) && "Unknown constant"); - SrcBitElements[I] = - CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits) - : CFP->getValueAPF().bitcastToAPInt(); + SrcBitElements[I] = CInt ? CInt->getAPIntValue().trunc(SrcEltSizeInBits) + : CFP->getValueAPF().bitcastToAPInt(); } // Recast to dst width. @@ -11078,6 +11644,10 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const { if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) return N.getNode(); + if ((N.getOpcode() == ISD::SPLAT_VECTOR) && + isa<ConstantFPSDNode>(N.getOperand(0))) + return N.getNode(); + return nullptr; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 6d8252046501..d236433f6fb4 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -96,7 +96,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode())) return false; int64_t PtrDiff; - if (NumBytes0.hasValue() && NumBytes1.hasValue() && + if (NumBytes0 && NumBytes1 && BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) { // If the size of memory access is unknown, do not use it to analysis. // One example of unknown size memory access is to load/store scalable diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c61716ba1676..37d05cdba76d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -24,25 +24,21 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" @@ -89,7 +85,6 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -102,10 +97,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/Local.h" #include <cstddef> -#include <cstring> #include <iterator> #include <limits> -#include <numeric> #include <tuple> using namespace llvm; @@ -224,10 +217,10 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, std::swap(Lo, Hi); EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); - Hi = - DAG.getNode(ISD::SHL, DL, TotalVT, Hi, - DAG.getConstant(Lo.getValueSizeInBits(), DL, - TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueSizeInBits(), DL, + TLI.getShiftAmountTy( + TotalVT, DAG.getDataLayout()))); Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); } @@ -276,7 +269,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. - if (AssertOp.hasValue()) + if (AssertOp) Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); @@ -330,7 +323,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, Optional<CallingConv::ID> CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); - const bool IsABIRegCopy = CallConv.hasValue(); + const bool IsABIRegCopy = CallConv.has_value(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; @@ -344,7 +337,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT, + *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates, RegisterVT); } else { NumRegs = @@ -566,7 +559,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false)); + DAG.getShiftAmountConstant(RoundBits, ValueVT, DL)); getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, CallConv); @@ -654,7 +647,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const bool IsABIRegCopy = CallConv.hasValue(); + const bool IsABIRegCopy = CallConv.has_value(); if (NumParts == 1) { EVT PartEVT = PartVT; @@ -733,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, DestEltCnt = ElementCount::getFixed(NumIntermediates); EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue()); + *DAG.getContext(), IntermediateVT.getScalarType(), *DestEltCnt); if (ValueVT == BuiltVectorTy) { // Nothing to do. @@ -926,10 +919,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, CallConv.getValue(), RegVTs[Value]) : RegVTs[Value]; - // We need to zero extend constants that are liveout to match assumptions - // in FunctionLoweringInfo::ComputePHILiveOutRegInfo. - if (ExtendKind == ISD::ANY_EXTEND && - (TLI.isZExtFree(Val, RegisterVT) || isa<ConstantSDNode>(Val))) + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], @@ -1239,7 +1229,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, // in the first place we should not be more successful here). Unless we // have some test case that prove this to be correct we should avoid // calling EmitFuncArgumentDbgValue here. - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, + FuncArgumentDbgValueKind::Value, Val)) { LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); @@ -1370,7 +1361,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, N = UnusedArgNodeMap[V]; if (N.getNode()) { // Only emit func arg dbg value for non-variadic dbg.values for now. - if (!IsVariadic && EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) + if (!IsVariadic && + EmitFuncArgumentDbgValue(V, Var, Expr, dl, + FuncArgumentDbgValueKind::Value, N)) return true; if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can @@ -1642,7 +1635,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); - } else if (isa<ConstantAggregateZero>(C)) { + } + + if (isa<ConstantAggregateZero>(C)) { EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); @@ -1654,12 +1649,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ScalableVectorType>(VecTy)) return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); - else { - SmallVector<SDValue, 16> Ops; - Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op); - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); - } + + SmallVector<SDValue, 16> Ops; + Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } + llvm_unreachable("Unknown vector constant"); } @@ -1683,11 +1678,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } - if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) { + if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) return DAG.getMDNode(cast<MDNode>(MD->getMetadata())); - } + if (const auto *BB = dyn_cast<BasicBlock>(V)) return DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + llvm_unreachable("Can't get register for value!"); } @@ -2751,10 +2747,10 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, None, CallOptions, getCurSDLoc()).second; - // On PS4, the "return address" must still be within the calling function, - // even if it's at the very end, so emit an explicit TRAP here. + // On PS4/PS5, the "return address" must still be within the calling + // function, even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. - if (TM.getTargetTriple().isPS4CPU()) + if (TM.getTargetTriple().isPS()) Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); // WebAssembly needs an unreachable instruction after a non-returning call, // because the function return type can be different from __stack_chk_fail's @@ -3153,26 +3149,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( Op1.getValueType(), DAG.getDataLayout()); - // Coerce the shift amount to the right type if we can. + // Coerce the shift amount to the right type if we can. This exposes the + // truncate or zext to optimization early. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { - unsigned ShiftSize = ShiftTy.getSizeInBits(); - unsigned Op2Size = Op2.getValueSizeInBits(); - SDLoc DL = getCurSDLoc(); - - // If the operand is smaller than the shift count type, promote it. - if (ShiftSize > Op2Size) - Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); - - // If the operand is larger than the shift count type but the shift - // count type has enough bits to represent any shift value, truncate - // it now. This is a common case and it exposes the truncate to - // optimization early. - else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits())) - Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); - // Otherwise we'll need to temporarily settle for some other convenient - // type. Type legalization will make adjustments once the shiftee is split. - else - Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); + assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) && + "Unexpected shift type"); + Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy); } bool nuw = false; @@ -3819,13 +3801,8 @@ void SelectionDAGBuilder::visitInsertValue(const User &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const User &I) { - ArrayRef<unsigned> Indices; - if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) - Indices = EV->getIndices(); - else - Indices = cast<ConstantExpr>(&I)->getIndices(); - +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { + ArrayRef<unsigned> Indices = I.getIndices(); const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); @@ -4379,7 +4356,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // In all other cases the function returns 'false'. static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, ISD::MemIndexType &IndexType, SDValue &Scale, - SelectionDAGBuilder *SDB, const BasicBlock *CurBB) { + SelectionDAGBuilder *SDB, const BasicBlock *CurBB, + uint64_t ElemSize) { SelectionDAG& DAG = SDB->DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); @@ -4419,9 +4397,16 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - Scale = DAG.getTargetConstant( - DL.getTypeAllocSize(GEP->getResultElementType()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + + // MGATHER/MSCATTER are only required to support scaling by one or by the + // element size. Other scales may be produced using target-specific DAG + // combines. + uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); + if (ScaleVal != ElemSize && ScaleVal != 1) + return false; + + Scale = + DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } @@ -4435,7 +4420,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { EVT VT = Src0.getValueType(); Align Alignment = cast<ConstantInt>(I.getArgOperand(2)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT.getScalarType())); + .value_or(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Base; @@ -4443,7 +4428,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, - I.getParent()); + I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( @@ -4454,7 +4439,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } @@ -4541,7 +4526,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); Align Alignment = cast<ConstantInt>(I.getArgOperand(1)) ->getMaybeAlignValue() - .getValueOr(DAG.getEVTAlign(VT.getScalarType())); + .value_or(DAG.getEVTAlign(VT.getScalarType())); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); @@ -4551,7 +4536,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { ISD::MemIndexType IndexType; SDValue Scale; bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, - I.getParent()); + I.getParent(), VT.getScalarStoreSize()); unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(AS), MachineMemOperand::MOLoad, @@ -4562,7 +4547,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } @@ -4681,7 +4666,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); if (!TLI.supportsUnalignedAtomics() && - I.getAlignment() < MemVT.getSizeInBits() / 8) + I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); @@ -4733,7 +4718,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < MemVT.getSizeInBits() / 8) + if (I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4784,7 +4769,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } - // Info is set by getTgtMemInstrinsic + // Info is set by getTgtMemIntrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, @@ -4898,7 +4883,8 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( ISD::SRL, dl, MVT::i32, t0, - DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getConstant(23, dl, + TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, DAG.getConstant(127, dl, MVT::i32)); return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); @@ -4923,10 +4909,11 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); // IntegerPartOfX <<= 23; - IntegerPartOfX = DAG.getNode( - ISD::SHL, dl, MVT::i32, IntegerPartOfX, - DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( - DAG.getDataLayout()))); + IntegerPartOfX = + DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, dl, + DAG.getTargetLoweringInfo().getShiftAmountTy( + MVT::i32, DAG.getDataLayout()))); SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { @@ -5354,38 +5341,36 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { - // If RHS is a constant, we can expand this out to a multiplication tree, - // otherwise we end up lowering to a call to __powidf2 (for example). When - // optimizing for size, we only want to do this if the expansion would produce - // a small number of multiplies, otherwise we do the full expansion. + // If RHS is a constant, we can expand this out to a multiplication tree if + // it's beneficial on the target, otherwise we end up lowering to a call to + // __powidf2 (for example). if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { - // Get the exponent as a positive value. unsigned Val = RHSC->getSExtValue(); - if ((int)Val < 0) Val = -Val; // powi(x, 0) -> 1.0 if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - bool OptForSize = DAG.shouldOptForSize(); - if (!OptForSize || - // If optimizing for size, don't insert too many multiplies. - // This inserts up to 5 multiplies. - countPopulation(Val) + Log2_32(Val) < 7) { + if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI( + Val, DAG.shouldOptForSize())) { + // Get the exponent as a positive value. + if ((int)Val < 0) + Val = -Val; // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has // the benefit of being both really simple and much better than a libcall. - SDValue Res; // Logically starts equal to 1.0 + SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; // TODO: Intrinsics should have fast-math-flags that propagate to these // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) - Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + Res = + DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare); else - Res = CurSquare; // 1.0*CurSquare. + Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), @@ -5506,7 +5491,7 @@ getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs, /// appear for function arguments or in the prologue. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, bool IsDbgDeclare, const SDValue &N) { + DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -5540,7 +5525,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } }; - if (!IsDbgDeclare) { + if (Kind == FuncArgumentDbgValueKind::Value) { // ArgDbgValues are hoisted to the beginning of the entry block. So we // should only emit as ArgDbgValue if the dbg.value intrinsic is found in // the entry block. @@ -5627,7 +5612,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); - IsIndirect = IsDbgDeclare; + IsIndirect = Kind != FuncArgumentDbgValueKind::Value; } } @@ -5675,7 +5660,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( continue; } MachineInstr *NewMI = - MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, IsDbgDeclare); + MakeVRegDbgValue(RegAndSize.first, *FragmentExpr, + Kind != FuncArgumentDbgValueKind::Value); FuncInfo.ArgDbgValues.push_back(NewMI); } }; @@ -5693,7 +5679,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } Op = MachineOperand::CreateReg(VMI->second, false); - IsIndirect = IsDbgDeclare; + IsIndirect = Kind != FuncArgumentDbgValueKind::Value; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. @@ -5715,6 +5701,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op, Variable, Expr); + // Otherwise, use ArgDbgValues. FuncInfo.ArgDbgValues.push_back(NewMI); return true; } @@ -5820,16 +5807,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::vacopy: visitVACopy(I); return; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()), + TLI.getValueType(DAG.getDataLayout(), I.getType()), getValue(I.getArgOperand(0)))); return; case Intrinsic::addressofreturnaddress: - setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, + DAG.getNode(ISD::ADDROFRETURNADDR, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::sponentry: - setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, - TLI.getFrameIndexTy(DAG.getDataLayout()))); + setValue(&I, + DAG.getNode(ISD::SPONENTRY, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, @@ -5867,7 +5856,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memcpy defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG @@ -5890,7 +5879,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. Align DstAlign = MCI.getDestAlign().valueOrOne(); Align SrcAlign = MCI.getSourceAlign().valueOrOne(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG @@ -5913,10 +5902,28 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, + SDValue MS = DAG.getMemset( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, + isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); + updateDAGForMaybeTailCall(MS); + return; + } + case Intrinsic::memset_inline: { + const auto &MSII = cast<MemSetInlineInst>(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Value = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size"); + // @llvm.memset defines 0 and 1 to both mean no alignment. + Align DstAlign = MSII.getDestAlign().valueOrOne(); + bool isVol = MSII.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); - updateDAGForMaybeTailCall(MS); + updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memmove: { @@ -5927,7 +5934,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // @llvm.memmove defines 0 and 1 to both mean no alignment. Align DstAlign = MMI.getDestAlign().valueOrOne(); Align SrcAlign = MMI.getSourceAlign().valueOrOne(); - Align Alignment = commonAlignment(DstAlign, SrcAlign); + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG @@ -5946,15 +5953,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - unsigned DstAlign = MI.getDestAlignment(); - unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, - SrcAlign, Length, LengthTy, ElemSz, isTC, - MachinePointerInfo(MI.getRawDest()), - MachinePointerInfo(MI.getRawSource())); + SDValue MC = + DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz, + isTC, MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); return; } @@ -5964,15 +5969,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - unsigned DstAlign = MI.getDestAlignment(); - unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, - SrcAlign, Length, LengthTy, ElemSz, isTC, - MachinePointerInfo(MI.getRawDest()), - MachinePointerInfo(MI.getRawSource())); + SDValue MC = + DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz, + isTC, MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); return; } @@ -5982,13 +5985,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); - unsigned DstAlign = MI.getDestAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); - SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, - LengthTy, ElemSz, isTC, - MachinePointerInfo(MI.getRawDest())); + SDValue MC = + DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz, + isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); return; } @@ -6088,7 +6090,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, + FuncArgumentDbgValueKind::Declare, N); return; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), @@ -6098,8 +6101,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, - N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, + FuncArgumentDbgValueKind::Declare, N)) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << " (could not emit func-arg dbg_value)\n"); } @@ -6165,8 +6168,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); - assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(0)); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); @@ -6346,6 +6348,29 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, #include "llvm/IR/VPIntrinsics.def" visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I)); return; + case Intrinsic::fptrunc_round: { + // Get the last argument, the metadata and convert it to an integer in the + // call + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata(); + Optional<RoundingMode> RoundMode = + convertStrToRoundingMode(cast<MDString>(MD)->getString()); + + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Propagate fast-math-flags from IR to node(s). + SDNodeFlags Flags; + Flags.copyFMF(*cast<FPMathOperator>(&I)); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + + SDValue Result; + Result = DAG.getNode( + ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)), + DAG.getTargetConstant((int)*RoundMode, sdl, + TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, Result); + + return; + } case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -6400,6 +6425,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Res); DAG.setRoot(Res.getValue(0)); return; + case Intrinsic::is_fpclass: { + const DataLayout DLayout = DAG.getDataLayout(); + EVT DestVT = TLI.getValueType(DLayout, I.getType()); + EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType()); + unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + MachineFunction &MF = DAG.getMachineFunction(); + const Function &F = MF.getFunction(); + SDValue Op = getValue(I.getArgOperand(0)); + SDNodeFlags Flags; + Flags.setNoFPExcept( + !F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP)); + // If ISD::IS_FPCLASS should be expanded, do it right now, because the + // expansion can use illegal types. Making expansion early allows + // legalizing these types prior to selection. + if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) { + SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG); + setValue(&I, Result); + return; + } + + SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32); + SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags); + setValue(&I, V); + return; + } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); @@ -6846,7 +6896,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, + DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType()))); return; case Intrinsic::invariant_end: // Discard region information. @@ -7150,7 +7201,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, SetCC); return; } - case Intrinsic::experimental_vector_insert: { + case Intrinsic::vector_insert: { SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); SDValue Index = getValue(I.getOperand(2)); @@ -7167,7 +7218,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Index)); return; } - case Intrinsic::experimental_vector_extract: { + case Intrinsic::vector_extract: { SDValue Vec = getValue(I.getOperand(0)); SDValue Index = getValue(I.getOperand(1)); EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -7245,7 +7296,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( }; SDVTList VTs = DAG.getVTList(ValueVTs); - fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; if (EB == fp::ExceptionBehavior::ebIgnore) @@ -7310,13 +7361,14 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { Optional<unsigned> ResOPC; switch (VPIntrin.getIntrinsicID()) { -#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: -#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD; -#define END_REGISTER_VP_INTRINSIC(VPID) break; +#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ + case Intrinsic::VPID: \ + ResOPC = ISD::VPSD; \ + break; #include "llvm/IR/VPIntrinsics.def" } - if (!ResOPC.hasValue()) + if (!ResOPC) llvm_unreachable( "Inconsistency: no SDNode available for this VPIntrinsic!"); @@ -7327,7 +7379,7 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { : ISD::VP_REDUCE_FMUL; } - return ResOPC.getValue(); + return *ResOPC; } void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, @@ -7365,11 +7417,12 @@ void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent()); + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } @@ -7421,11 +7474,12 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, SDValue Base, Index, Scale; ISD::MemIndexType IndexType; bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent()); + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); if (!UniformBase) { Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_UNSCALED; + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); } @@ -7444,18 +7498,104 @@ void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, setValue(&VPIntrin, ST); } +void SelectionDAGBuilder::visitVPStridedLoad( + const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) { + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + + SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], + OpValues[2], OpValues[3], MMO, + false /*IsExpanding*/); + + if (AddToChain) + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStridedStore( + const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) { + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + + SDValue ST = DAG.getStridedStoreVP( + getMemoryRoot(), DL, OpValues[0], OpValues[1], + DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3], + OpValues[4], VT, MMO, ISD::UNINDEXED, /*IsTruncating*/ false, + /*IsCompressing*/ false); + + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + +void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL = getCurSDLoc(); + + ISD::CondCode Condition; + CmpInst::Predicate CondCode = VPIntrin.getPredicate(); + bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy(); + if (IsFP) { + // FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan) + // flags, but calls that don't return floating-point types can't be + // FPMathOperators, like vp.fcmp. This affects constrained fcmp too. + Condition = getFCmpCondCode(CondCode); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + } else { + Condition = getICmpCondCode(CondCode); + } + + SDValue Op1 = getValue(VPIntrin.getOperand(0)); + SDValue Op2 = getValue(VPIntrin.getOperand(1)); + // #2 is the condition code + SDValue MaskOp = getValue(VPIntrin.getOperand(3)); + SDValue EVL = getValue(VPIntrin.getOperand(4)); + MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); + assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && + "Unexpected target EVL type"); + EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL); + + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + VPIntrin.getType()); + setValue(&VPIntrin, + DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL)); +} + void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { SDLoc DL = getCurSDLoc(); unsigned Opcode = getISDForVPIntrinsic(VPIntrin); + auto IID = VPIntrin.getIntrinsicID(); + + if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(&VPIntrin)) + return visitVPCmp(*CmpI); + SmallVector<EVT, 4> ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); - auto EVLParamPos = - VPIntrinsic::getVectorLengthParamPos(VPIntrin.getIntrinsicID()); + auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID); MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy(); assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) && @@ -7472,7 +7612,10 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( switch (Opcode) { default: { - SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin)) + SDFlags.copyFMF(*FPMO); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags); setValue(&VPIntrin, Result); break; } @@ -7481,10 +7624,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, Opcode == ISD::VP_GATHER); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues); + break; case ISD::VP_STORE: case ISD::VP_SCATTER: visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + visitVPStridedStore(VPIntrin, OpValues); + break; } } @@ -7759,7 +7908,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); - const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size)); if (CSize && CSize->getZExtValue() == 0) { EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType(), true); @@ -8280,7 +8429,7 @@ public: // accessed type. if (isIndirect) { OpTy = ParamElemType; - assert(OpTy && "Indirect opernad must have elementtype attribute"); + assert(OpTy && "Indirect operand must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -8401,8 +8550,9 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SmallVector<unsigned, 4> Regs; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - // No work to do for memory operations. - if (OpInfo.ConstraintType == TargetLowering::C_Memory) + // No work to do for memory/address operands. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Address) return None; // If this is a constraint for a single physreg, or a constraint for a @@ -8582,7 +8732,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (OpInfo.hasArg()) { OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); - Type *ParamElemTy = Call.getAttributes().getParamElementType(ArgNo); + Type *ParamElemTy = Call.getParamElementType(ArgNo); EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout(), ParamElemTy); OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; @@ -8660,8 +8810,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); - if (OpInfo.ConstraintType == TargetLowering::C_Memory && - OpInfo.Type == InlineAsm::isClobber) + if ((OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) || + OpInfo.ConstraintType == TargetLowering::C_Address) continue; // If this is a memory input, and if the operand is not indirect, do what we @@ -8711,7 +8862,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, : OpInfo; const auto RegError = getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); - if (RegError.hasValue()) { + if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const char *RegName = TRI.getName(RegError.getValue()); @@ -8736,6 +8887,10 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } return false; }; + assert((OpInfo.ConstraintType != TargetLowering::C_Address || + (OpInfo.Type == InlineAsm::isInput && + !OpInfo.isMatchingInputConstraint())) && + "Only address as input operand is allowed."); switch (OpInfo.Type) { case InlineAsm::isOutput: @@ -8868,8 +9023,11 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } - if (OpInfo.ConstraintType == TargetLowering::C_Memory) { - assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Address) { + assert((OpInfo.isIndirect || + OpInfo.ConstraintType != TargetLowering::C_Memory) && + "Operand must be indirect to be a mem!"); assert(InOperandVal.getValueType() == TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); @@ -9007,6 +9165,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; case TargetLowering::C_Memory: break; // Already handled. + case TargetLowering::C_Address: + break; // Silence warning. case TargetLowering::C_Unknown: assert(false && "Unexpected unknown constraint"); } @@ -9953,8 +10113,9 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("LowerOperation not implemented for this target!"); } -void -SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { +void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, + unsigned Reg, + ISD::NodeType ExtendType) { SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && @@ -9969,10 +10130,11 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { None); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); - ISD::NodeType ExtendType = ISD::ANY_EXTEND; - auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); - if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) - ExtendType = PreferredExtendIt->second; + if (ExtendType == ISD::ANY_EXTEND) { + auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); + if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) + ExtendType = PreferredExtendIt->second; + } RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -10545,6 +10707,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { /// the end. void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Instruction *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; @@ -10582,7 +10745,13 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C); - CopyValueToVirtualRegister(C, RegOut); + // We need to zero/sign extend ConstantInt phi operands to match + // assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo. + ISD::NodeType ExtendType = ISD::ANY_EXTEND; + if (auto *CI = dyn_cast<ConstantInt>(C)) + ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + CopyValueToVirtualRegister(C, RegOut, ExtendType); } Reg = RegOut; } else { @@ -10602,7 +10771,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector<EVT, 4> ValueVTs; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ea48042a5dcf..72cca3d9b001 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -284,7 +284,8 @@ public: return CurInst ? CurInst->getDebugLoc() : DebugLoc(); } - void CopyValueToVirtualRegister(const Value *V, unsigned Reg); + void CopyValueToVirtualRegister(const Value *V, unsigned Reg, + ISD::NodeType ExtendType = ISD::ANY_EXTEND); void visit(const Instruction &I); @@ -527,7 +528,7 @@ private: void visitInsertElement(const User &I); void visitShuffleVector(const User &I); - void visitExtractValue(const User &I); + void visitExtractValue(const ExtractValueInst &I); void visitInsertValue(const User &I); void visitLandingPad(const LandingPadInst &LP); @@ -570,6 +571,11 @@ private: SmallVector<SDValue, 7> &OpValues, bool IsGather); void visitVPStoreScatter(const VPIntrinsic &VPIntrin, SmallVector<SDValue, 7> &OpValues, bool IsScatter); + void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVectorImpl<SDValue> &OpValues); + void visitVPStridedStore(const VPIntrinsic &VPIntrin, + SmallVectorImpl<SDValue> &OpValues); + void visitVPCmp(const VPCmpIntrinsic &VPIntrin); void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); @@ -602,12 +608,22 @@ private: void emitInlineAsmError(const CallBase &Call, const Twine &Message); + /// An enum that states to emit func argument dbg value the kind of intrinsic + /// it originally had. This controls the internal behavior of + /// EmitFuncArgumentDbgValue. + enum class FuncArgumentDbgValueKind { + Value, // This was originally a llvm.dbg.value. + Addr, // This was originally a llvm.dbg.addr. + Declare, // This was originally a llvm.dbg.declare. + }; + /// If V is an function argument then create corresponding DBG_VALUE machine /// instruction for it now. At the end of instruction selection, they will be /// inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, - bool IsDbgDeclare, const SDValue &N); + FuncArgumentDbgValueKind Kind, + const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); @@ -673,9 +689,7 @@ struct RegsForValue { const DataLayout &DL, unsigned Reg, Type *Ty, Optional<CallingConv::ID> CC); - bool isABIMangled() const { - return CallConv.hasValue(); - } + bool isABIMangled() const { return CallConv.has_value(); } /// Add the specified values to this one. void append(const RegsForValue &RHS) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 77e9e53668f9..bbfc6e5ef64f 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -45,7 +45,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "SDNodeDbgValue.h" #include <cstdint> #include <iterator> @@ -231,6 +230,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::MUL: return "mul"; case ISD::MULHU: return "mulhu"; case ISD::MULHS: return "mulhs"; + case ISD::AVGFLOORU: return "avgflooru"; + case ISD::AVGFLOORS: return "avgfloors"; + case ISD::AVGCEILU: return "avgceilu"; + case ISD::AVGCEILS: return "avgceils"; case ISD::ABDS: return "abds"; case ISD::ABDU: return "abdu"; case ISD::SDIV: return "sdiv"; @@ -267,6 +270,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FCANONICALIZE: return "fcanonicalize"; + case ISD::IS_FPCLASS: return "is_fpclass"; case ISD::FPOW: return "fpow"; case ISD::STRICT_FPOW: return "strict_fpow"; case ISD::SMIN: return "smin"; @@ -361,6 +365,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16"; + case ISD::BF16_TO_FP: return "bf16_to_fp"; + case ISD::FP_TO_BF16: return "fp_to_bf16"; case ISD::LROUND: return "lround"; case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; @@ -814,6 +820,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) { if (LN->hasOffset()) OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">"; + } else if (const auto *AA = dyn_cast<AssertAlignSDNode>(this)) { + OS << '<' << AA->getAlign().value() << '>'; } if (VerboseDAGDumping) { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b83a60129c78..2b63359c2b1b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -15,11 +15,9 @@ #include "SelectionDAGBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -29,6 +27,7 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -69,7 +68,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -82,7 +80,6 @@ #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" @@ -370,8 +367,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, // PHI. for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); - if (!CE || !CE->canTrap()) continue; + Constant *C = dyn_cast<Constant>(PN->getIncomingValue(i)); + if (!C || !C->canTrap()) continue; // The only case we have to worry about is when the edge is critical. // Since this block has a PHI Node, we assume it has multiple input @@ -709,6 +706,7 @@ static void reportFastISelFailure(MachineFunction &MF, report_fatal_error(Twine(R.getMsg())); ORE.emit(R); + LLVM_DEBUG(dbgs() << R.getMsg() << "\n"); } void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, @@ -1527,6 +1525,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { BeforeInst->hasOneUse() && FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { // If we succeeded, don't re-select the load. + LLVM_DEBUG(dbgs() + << "FastISel folded load: " << *BeforeInst << "\n"); BI = std::next(BasicBlock::const_iterator(BeforeInst)); --NumFastIselRemaining; ++NumFastIselSuccess; @@ -3272,6 +3272,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // If the chained node is not the root, we can't fold it if it has + // multiple uses. // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && @@ -3309,6 +3311,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + // If the chained node is not the root, we can't fold it if it has + // multiple uses. // FIXME: What if other value results of the node have uses not matched // by this pattern? if (ChainNodesMatched.back() != NodeToMatch && @@ -3447,12 +3451,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // such nodes must have a chain, it suffices to check ChainNodesMatched. // We need to perform this check before potentially modifying one of the // nodes via MorphNode. - bool MayRaiseFPException = false; - for (auto *N : ChainNodesMatched) - if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) { - MayRaiseFPException = true; - break; - } + bool MayRaiseFPException = + llvm::any_of(ChainNodesMatched, [this](SDNode *N) { + return mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept(); + }); // Create the node. MachineSDNode *Res = nullptr; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index d022e2a23ea0..b66eeb6d2bb1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -13,15 +13,11 @@ #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "dag-printer" @@ -181,11 +177,11 @@ LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName, /// clearGraphAttrs - Clear all previously defined node graph attributes. /// Intended to be used from a debugging tool (eg. gdb). void SelectionDAG::clearGraphAttrs() { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS NodeGraphAttrs.clear(); #else - errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::clearGraphAttrs is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; #endif } @@ -193,11 +189,11 @@ void SelectionDAG::clearGraphAttrs() { /// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) /// void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS NodeGraphAttrs[N] = Attrs; #else - errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphAttrs is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; #endif } @@ -205,7 +201,7 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { /// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) /// Used from getNodeAttributes. std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS std::map<const SDNode *, std::string>::const_iterator I = NodeGraphAttrs.find(N); @@ -214,8 +210,8 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { else return ""; #else - errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::getGraphAttrs is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; return std::string(); #endif } @@ -223,11 +219,11 @@ std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { /// setGraphColor - Convenience for setting node color attribute. /// void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS NodeGraphAttrs[N] = std::string("color=") + Color; #else - errs() << "SelectionDAG::setGraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphColor is only available in builds with " + << "ABI breaking checks enabled on systems with Graphviz or gv!\n"; #endif } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index dfda7d8b9f81..19a52fde44c1 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,7 +17,10 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -27,6 +30,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -168,7 +172,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()]; - auto It = RelocationMap.find(Relocate->getDerivedPtr()); + auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) return None; @@ -192,10 +196,10 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, for (auto &IncomingValue : Phi->incoming_values()) { Optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); - if (!SpillSlot.hasValue()) + if (!SpillSlot) return None; - if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + if (MergedResult && *MergedResult != *SpillSlot) return None; MergedResult = SpillSlot; @@ -276,7 +280,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, const int LookUpDepth = 6; Optional<int> Index = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); - if (!Index.hasValue()) + if (!Index) return; const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; @@ -526,14 +530,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, GCStrategy &S = GFI->getStrategy(); for (const Value *V : SI.Bases) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { + if (Opt) { assert(Opt.getValue() && "non gc managed base pointer found in statepoint"); } } for (const Value *V : SI.Ptrs) { auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { + if (Opt) { assert(Opt.getValue() && "non gc managed derived pointer found in statepoint"); } @@ -880,8 +884,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); DAG.setNodeMemRefs(StatepointMCNode, MemRefs); - // For values lowered to tied-defs, create the virtual registers. Note that - // for simplicity, we *always* create a vreg even within a single block. + // For values lowered to tied-defs, create the virtual registers if used + // in other blocks. For local gc.relocate record appropriate statepoint + // result in StatepointLoweringState. DenseMap<SDValue, Register> VirtRegs; for (const auto *Relocate : SI.GCRelocates) { Value *Derived = Relocate->getDerivedPtr(); @@ -889,12 +894,23 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( if (!LowerAsVReg.count(SD)) continue; + SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]); + + // Handle local relocate. Note that different relocates might + // map to the same SDValue. + if (SI.StatepointInstr->getParent() == Relocate->getParent()) { + SDValue Res = StatepointLowering.getLocation(SD); + if (Res) + assert(Res == Relocated); + else + StatepointLowering.setLocation(SD, Relocated); + continue; + } + // Handle multiple gc.relocates of the same input efficiently. if (VirtRegs.count(SD)) continue; - SDValue Relocated = SDValue(StatepointMCNode, LowerAsVReg[SD]); - auto *RetTy = Relocate->getType(); Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), @@ -915,8 +931,13 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( SDValue SDV = getValue(V); SDValue Loc = StatepointLowering.getLocation(SDV); + bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent()); + RecordType Record; - if (LowerAsVReg.count(SDV)) { + if (IsLocal && LowerAsVReg.count(SDV)) { + // Result is already stored in StatepointLowering + Record.type = RecordType::SDValueNode; + } else if (LowerAsVReg.count(SDV)) { Record.type = RecordType::VReg; assert(VirtRegs.count(SDV)); Record.payload.Reg = VirtRegs[SDV]; @@ -932,7 +953,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( if (Relocate->getParent() != StatepointInstr->getParent()) ExportFromCurrentBlock(V); } - RelocationMap[V] = Record; + RelocationMap[Relocate] = Record; } @@ -1148,8 +1169,8 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID; auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes()); - SI.ID = SD.StatepointID.getValueOr(DefaultID); - SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0); + SI.ID = SD.StatepointID.value_or(DefaultID); + SI.NumPatchBytes = SD.NumPatchBytes.value_or(0); SI.DeoptState = ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end()); @@ -1210,11 +1231,19 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; - auto SlotIt = RelocationMap.find(DerivedPtr); + auto SlotIt = RelocationMap.find(&Relocate); assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value"); const RecordType &Record = SlotIt->second; // If relocation was done via virtual register.. + if (Record.type == RecordType::SDValueNode) { + assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + "Nonlocal gc.relocate mapped via SDValue"); + SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr)); + assert(SDV.getNode() && "empty SDValue"); + setValue(&Relocate, SDV); + return; + } if (Record.type == RecordType::VReg) { Register InReg = Record.payload.Reg; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f6d1fa87676f..a6b471ea22b7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -13,13 +13,13 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -30,7 +30,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -94,6 +93,8 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, // (We look for a CopyFromReg reading a virtual register that is used // for the function live-in value of register Reg) SDValue Value = OutVals[I]; + if (Value->getOpcode() == ISD::AssertZext) + Value = Value.getOperand(0); if (Value->getOpcode() != ISD::CopyFromReg) return false; Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); @@ -121,7 +122,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamStackAlign(ArgIdx); IndirectType = nullptr; - assert(IsByVal + IsPreallocated + IsInAlloca <= 1 && + assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 && "multiple ABI attributes?"); if (IsByVal) { IndirectType = Call->getParamByValType(ArgIdx); @@ -132,6 +133,8 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IndirectType = Call->getParamPreallocatedType(ArgIdx); if (IsInAlloca) IndirectType = Call->getParamInAllocaType(ArgIdx); + if (IsSRet) + IndirectType = Call->getParamStructRetType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -193,7 +196,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, bool TargetLowering::findOptimalMemOpLowering( std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const { - if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) + if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() && + Op.getSrcAlign() < Op.getDstAlign()) return false; EVT VT = getOptimalMemOpType(Op, FuncAttributes); @@ -905,6 +909,132 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( Depth); } +// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1). +// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1). +static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI, + const APInt &DemandedBits, + const APInt &DemandedElts, + unsigned Depth) { + assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) && + "SRL or SRA node is required here!"); + // Is the right shift using an immediate value of 1? + ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); + if (!N1C || !N1C->isOne()) + return SDValue(); + + // We are looking for an avgfloor + // add(ext, ext) + // or one of these as a avgceil + // add(add(ext, ext), 1) + // add(add(ext, 1), ext) + // add(ext, add(ext, 1)) + SDValue Add = Op.getOperand(0); + if (Add.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue ExtOpA = Add.getOperand(0); + SDValue ExtOpB = Add.getOperand(1); + auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) { + ConstantSDNode *ConstOp; + if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op2; + ExtOpB = Op3; + return true; + } + if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op1; + ExtOpB = Op3; + return true; + } + if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op1; + ExtOpB = Op2; + return true; + } + return false; + }; + bool IsCeil = + (ExtOpA.getOpcode() == ISD::ADD && + MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) || + (ExtOpB.getOpcode() == ISD::ADD && + MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA)); + + // If the shift is signed (sra): + // - Needs >= 2 sign bit for both operands. + // - Needs >= 2 zero bits. + // If the shift is unsigned (srl): + // - Needs >= 1 zero bit for both operands. + // - Needs 1 demanded bit zero and >= 2 sign bits. + unsigned ShiftOpc = Op.getOpcode(); + bool IsSigned = false; + unsigned KnownBits; + unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth); + unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth); + unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1; + unsigned NumZeroA = + DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros(); + unsigned NumZeroB = + DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros(); + unsigned NumZero = std::min(NumZeroA, NumZeroB); + + switch (ShiftOpc) { + default: + llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG"); + case ISD::SRA: { + if (NumZero >= 2 && NumSigned < NumZero) { + IsSigned = false; + KnownBits = NumZero; + break; + } + if (NumSigned >= 1) { + IsSigned = true; + KnownBits = NumSigned; + break; + } + return SDValue(); + } + case ISD::SRL: { + if (NumZero >= 1 && NumSigned < NumZero) { + IsSigned = false; + KnownBits = NumZero; + break; + } + if (NumSigned >= 1 && DemandedBits.isSignBitClear()) { + IsSigned = true; + KnownBits = NumSigned; + break; + } + return SDValue(); + } + } + + unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU) + : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU); + + // Find the smallest power-2 type that is legal for this vector size and + // operation, given the original type size and the number of known sign/zero + // bits. + EVT VT = Op.getValueType(); + unsigned MinWidth = + std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth)); + if (VT.isVector()) + NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount()); + if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) + return SDValue(); + + SDLoc DL(Op); + SDValue ResultAVG = + DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA), + DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB)); + return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, + ResultAVG); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -989,7 +1119,7 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits SrcKnown; SDValue Src = Op.getOperand(0); unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); - APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); + APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth); if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) return true; @@ -1105,7 +1235,7 @@ bool TargetLowering::SimplifyDemandedBits( break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO, Depth + 1)) @@ -1409,6 +1539,19 @@ bool TargetLowering::SimplifyDemandedBits( // Only known if known in both the LHS and RHS. Known = KnownBits::commonBits(Known, Known2); break; + case ISD::VSELECT: + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + Known = KnownBits::commonBits(Known, Known2); + break; case ISD::SELECT_CC: if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, Depth + 1)) @@ -1542,6 +1685,16 @@ bool TargetLowering::SimplifyDemandedBits( // low bits known zero. Known.Zero.setLowBits(ShAmt); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // Try shrinking the operation as long as the shift amount will still be // in range. if ((ShAmt < DemandedBits.getActiveBits()) && @@ -1567,6 +1720,11 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); + // Try to match AVG patterns. + if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + if (const APInt *SA = TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); @@ -1633,6 +1791,11 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + // Try to match AVG patterns. + if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + if (const APInt *SA = TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); @@ -1727,6 +1890,22 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); Known.One |= Known2.One; Known.Zero |= Known2.Zero; + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() || + !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0; + DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0, + DemandedOp1, Op2); + return TLO.CombineTo(Op, NewOp); + } + } } // For pow-2 bitwidths we only demand the bottom modulo amt bits. @@ -1899,7 +2078,8 @@ bool TargetLowering::SimplifyDemandedBits( // bit is demanded. InputDemandedBits.setBit(ExVTBits - 1); - if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -1965,7 +2145,7 @@ bool TargetLowering::SimplifyDemandedBits( } APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -2002,7 +2182,7 @@ bool TargetLowering::SimplifyDemandedBits( } APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -2046,7 +2226,7 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -2265,9 +2445,27 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::MUL: - // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1] - if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1)) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); + if (DemandedBits.isPowerOf2()) { + // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. + // If we demand exactly one bit N and we have "X * (C' << N)" where C' is + // odd (has LSB set), then the left-shifted low bit of X is the answer. + unsigned CTZ = DemandedBits.countTrailingZeros(); + ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); + if (C && C->getAPIntValue().countTrailingZeros() == CTZ) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC); + return TLO.CombineTo(Op, Shl); + } + } + // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: + // X * X is odd iff X is odd. + // 'Quadratic Reciprocity': X * X -> 0 for bit[1] + if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) { + SDValue One = TLO.DAG.getConstant(1, dl, VT); + SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One); + return TLO.CombineTo(Op, And1); + } LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { @@ -2330,6 +2528,49 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, NewOp); } + // Match a multiply with a disguised negated-power-of-2 and convert to a + // an equivalent shift-left amount. + // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned { + if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse()) + return 0; + + // Don't touch opaque constants. Also, ignore zero and power-of-2 + // multiplies. Those will get folded later. + ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1)); + if (MulC && !MulC->isOpaque() && !MulC->isZero() && + !MulC->getAPIntValue().isPowerOf2()) { + APInt UnmaskedC = MulC->getAPIntValue() | HighMask; + if (UnmaskedC.isNegatedPowerOf2()) + return (-UnmaskedC).logBase2(); + } + return 0; + }; + + auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC); + SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl); + return TLO.CombineTo(Op, Res); + }; + + if (isOperationLegalOrCustom(ISD::SHL, VT)) { + if (Op.getOpcode() == ISD::ADD) { + // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op0)) + return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt); + // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt); + } + if (Op.getOpcode() == ISD::SUB) { + // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt); + } + } + LLVM_FALLTHROUGH; } default: @@ -2347,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we know the value of all of the demanded bits, return this as a // constant. - if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { + if (!isTargetCanonicalConstantNode(Op) && + DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNode *Op : @@ -2370,13 +2612,12 @@ bool TargetLowering::SimplifyDemandedBits( bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, - APInt &KnownUndef, - APInt &KnownZero, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); + APInt KnownUndef, KnownZero; bool Simplified = SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); if (Simplified) { @@ -2447,6 +2688,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef = KnownZero = APInt::getZero(NumElts); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO)) + return false; + // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) return false; @@ -2565,6 +2810,21 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known, TLO, Depth + 1)) return true; + + // The bitcast has split each wide element into a number of + // narrow subelements. We have just computed the Known bits + // for wide elements. See if element splitting results in + // some subelements being zero. Only for demanded elements! + for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) { + if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits) + .isAllOnes()) + continue; + for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) { + unsigned Elt = Scale * SrcElt + SubElt; + if (DemandedElts[Elt]) + KnownZero.setBit(Elt); + } + } } // If the src element is zero/undef then all the output elements will be - @@ -2646,6 +2906,25 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef.insertBits(SubUndef, i * NumSubElts); KnownZero.insertBits(SubZero, i * NumSubElts); } + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedElts.isAllOnes()) { + bool FoundNewSub = false; + SmallVector<SDValue, 2> DemandedSubOps; + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts( + SubOp, SubElts, TLO.DAG, Depth + 1); + DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp); + FoundNewSub = NewSubOp ? true : FoundNewSub; + } + if (FoundNewSub) { + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::INSERT_SUBVECTOR: { @@ -2699,7 +2978,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, @@ -2858,7 +3137,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt SrcUndef, SrcZero; SDValue Src = Op.getOperand(0); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -3618,6 +3897,115 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, return SDValue(); } +static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !(C1->isZero() || C1->isAllOnes())) + return SDValue(); + + auto getRotateSource = [](SDValue X) { + if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR) + return X.getOperand(0); + return SDValue(); + }; + + // Peek through a rotated value compared against 0 or -1: + // (rot X, Y) == 0/-1 --> X == 0/-1 + // (rot X, Y) != 0/-1 --> X != 0/-1 + if (SDValue R = getRotateSource(N0)) + return DAG.getSetCC(dl, VT, R, N1, Cond); + + // Peek through an 'or' of a rotated value compared against 0: + // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0 + // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0 + // + // TODO: Add the 'and' with -1 sibling. + // TODO: Recurse through a series of 'or' ops to find the rotate. + EVT OpVT = N0.getValueType(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) { + if (SDValue R = getRotateSource(N0.getOperand(0))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (SDValue R = getRotateSource(N0.getOperand(1))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + } + + return SDValue(); +} + +static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + // If we are testing for all-bits-clear, we might be able to do that with + // less shifting since bit-order does not matter. + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !C1->isZero()) + return SDValue(); + + if (!N0.hasOneUse() || + (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR)) + return SDValue(); + + unsigned BitWidth = N0.getScalarValueSizeInBits(); + auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2)); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) + return SDValue(); + + // Canonicalize fshr as fshl to reduce pattern-matching. + unsigned ShAmt = ShAmtC->getZExtValue(); + if (N0.getOpcode() == ISD::FSHR) + ShAmt = BitWidth - ShAmt; + + // Match an 'or' with a specific operand 'Other' in either commuted variant. + SDValue X, Y; + auto matchOr = [&X, &Y](SDValue Or, SDValue Other) { + if (Or.getOpcode() != ISD::OR || !Or.hasOneUse()) + return false; + if (Or.getOperand(0) == Other) { + X = Or.getOperand(0); + Y = Or.getOperand(1); + return true; + } + if (Or.getOperand(1) == Other) { + X = Or.getOperand(1); + Y = Or.getOperand(0); + return true; + } + return false; + }; + + EVT OpVT = N0.getValueType(); + EVT ShAmtVT = N0.getOperand(2).getValueType(); + SDValue F0 = N0.getOperand(0); + SDValue F1 = N0.getOperand(1); + if (matchOr(F0, F1)) { + // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (matchOr(F1, F0)) { + // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -3632,13 +4020,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) return Fold; + bool N0ConstOrSplat = + isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + bool N1ConstOrSplat = + isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. // FIXME: We can't yet fold constant scalable vector splats, so avoid an // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isConstOrConstSplat(N0) && - (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && + if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -3647,13 +4039,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // -- but in reverse order -- then try to commute the operands of this setcc // to match. A matching pair of setcc (cmp) and sub may be combined into 1 // instruction on some targets. - if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && + if (!N0ConstOrSplat && !N1ConstOrSplat && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG)) + return V; + + if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG)) + return V; + if (auto *N1C = isConstOrConstSplat(N1)) { const APInt &C1 = N1C->getAPIntValue(); @@ -4399,37 +4797,30 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 - if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(RHSC->getAPIntValue()- - LHSR->getAPIntValue(), - dl, N0.getValueType()), Cond); - } - - // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. - if (N0.getOpcode() == ISD::XOR) - // If we know that all of the inverted bits are zero, don't bother - // performing the inversion. - if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) - return - DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(LHSR->getAPIntValue() ^ - RHSC->getAPIntValue(), - dl, N0.getValueType()), - Cond); + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(), + dl, N0.getValueType()), + Cond); + + // Turn (X^C1) == C2 --> X == C1^C2 + if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); } // Turn (C1-X) == C2 --> X == C1-C2 - if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { - if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { - return - DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(SUBC->getAPIntValue() - - RHSC->getAPIntValue(), - dl, N0.getValueType()), - Cond); - } - } + if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); // Could RHSC fold directly into a compare? if (RHSC->getValueType(0).getSizeInBits() <= 64) @@ -4582,13 +4973,14 @@ TargetLowering::getConstraintType(StringRef Constraint) const { case 'o': // offsetable case 'V': // not offsetable return C_Memory; + case 'p': // Address. + return C_Address; case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant return C_Immediate; case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant - case 'p': // Address. case 'X': // Allow ANY value. case 'I': // Target registers. case 'J': @@ -4826,8 +5218,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, if (OpInfo.CallOperandVal) { llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); if (OpInfo.isIndirect) { - OpTy = Call.getAttributes().getParamElementType(ArgNo); - assert(OpTy && "Indirect opernad must have elementtype attribute"); + OpTy = Call.getParamElementType(ArgNo); + assert(OpTy && "Indirect operand must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -4962,6 +5354,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_RegisterClass: return 2; case TargetLowering::C_Memory: + case TargetLowering::C_Address: return 3; } llvm_unreachable("Invalid constraint type"); @@ -5232,6 +5625,17 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); } +SDValue +TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SREM as SREM + return SDValue(); +} + /// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -7016,6 +7420,30 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return true; } +SDValue +TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node, + SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM || + Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) && + "Wrong opcode"); + + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + + return SDValue(); +} + SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); @@ -7058,29 +7486,234 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } - // If none of the above worked, but there are no NaNs, then expand to - // a compare/select sequence. This is required for correctness since - // InstCombine might have canonicalized a fcmp+select sequence to a - // FMINNUM/FMAXNUM node. If we were to fall through to the default - // expansion to libcall, we might introduce a link-time dependency - // on libm into a file that originally did not have one. - if (Node->getFlags().hasNoNaNs()) { - ISD::CondCode Pred = - Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; - SDValue Op1 = Node->getOperand(0); - SDValue Op2 = Node->getOperand(1); - SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); - // Copy FMF flags, but always set the no-signed-zeros flag - // as this is implied by the FMINNUM/FMAXNUM semantics. - SDNodeFlags Flags = Node->getFlags(); - Flags.setNoSignedZeros(true); - SelCC->setFlags(Flags); + if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG)) return SelCC; - } return SDValue(); } +SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, + unsigned Test, SDNodeFlags Flags, + const SDLoc &DL, + SelectionDAG &DAG) const { + EVT OperandVT = Op.getValueType(); + assert(OperandVT.isFloatingPoint()); + + // Degenerated cases. + if (Test == 0) + return DAG.getBoolConstant(false, DL, ResultVT, OperandVT); + if ((Test & fcAllFlags) == fcAllFlags) + return DAG.getBoolConstant(true, DL, ResultVT, OperandVT); + + // PPC double double is a pair of doubles, of which the higher part determines + // the value class. + if (OperandVT == MVT::ppcf128) { + Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op, + DAG.getConstant(1, DL, MVT::i32)); + OperandVT = MVT::f64; + } + + // Some checks may be represented as inversion of simpler check, for example + // "inf|normal|subnormal|zero" => !"nan". + bool IsInverted = false; + if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) { + IsInverted = true; + Test = InvertedCheck; + } + + // Floating-point type properties. + EVT ScalarFloatVT = OperandVT.getScalarType(); + const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext()); + const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics(); + bool IsF80 = (ScalarFloatVT == MVT::f80); + + // Some checks can be implemented using float comparisons, if floating point + // exceptions are ignored. + if (Flags.hasNoFPExcept() && + isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) { + if (Test == fcZero) + return DAG.getSetCC(DL, ResultVT, Op, + DAG.getConstantFP(0.0, DL, OperandVT), + IsInverted ? ISD::SETUNE : ISD::SETOEQ); + if (Test == fcNan) + return DAG.getSetCC(DL, ResultVT, Op, Op, + IsInverted ? ISD::SETO : ISD::SETUO); + } + + // In the general case use integer operations. + unsigned BitSize = OperandVT.getScalarSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize); + if (OperandVT.isVector()) + IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT, + OperandVT.getVectorElementCount()); + SDValue OpAsInt = DAG.getBitcast(IntVT, Op); + + // Various masks. + APInt SignBit = APInt::getSignMask(BitSize); + APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign. + APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit. + const unsigned ExplicitIntBitInF80 = 63; + APInt ExpMask = Inf; + if (IsF80) + ExpMask.clearBit(ExplicitIntBitInF80); + APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; + APInt QNaNBitMask = + APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); + APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits()); + + SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT); + SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT); + SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT); + SDValue ZeroV = DAG.getConstant(0, DL, IntVT); + SDValue InfV = DAG.getConstant(Inf, DL, IntVT); + SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT); + + SDValue Res; + const auto appendResult = [&](SDValue PartialRes) { + if (PartialRes) { + if (Res) + Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes); + else + Res = PartialRes; + } + }; + + SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set. + const auto getIntBitIsSet = [&]() -> SDValue { + if (!IntBitIsSetV) { + APInt IntBitMask(BitSize, 0); + IntBitMask.setBit(ExplicitIntBitInF80); + SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT); + SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV); + IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE); + } + return IntBitIsSetV; + }; + + // Split the value into sign bit and absolute value. + SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV); + SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt, + DAG.getConstant(0.0, DL, IntVT), ISD::SETLT); + + // Tests that involve more than one class should be processed first. + SDValue PartialRes; + + if (IsF80) + ; // Detect finite numbers of f80 by checking individual classes because + // they have different settings of the explicit integer bit. + else if ((Test & fcFinite) == fcFinite) { + // finite(V) ==> abs(V) < exp_mask + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT); + Test &= ~fcFinite; + } else if ((Test & fcFinite) == fcPosFinite) { + // finite(V) && V > 0 ==> V < exp_mask + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT); + Test &= ~fcPosFinite; + } else if ((Test & fcFinite) == fcNegFinite) { + // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1 + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + Test &= ~fcNegFinite; + } + appendResult(PartialRes); + + // Check for individual classes. + + if (unsigned PartialCheck = Test & fcZero) { + if (PartialCheck == fcPosZero) + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ); + else if (PartialCheck == fcZero) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ); + else // ISD::fcNegZero + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ); + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcInf) { + if (PartialCheck == fcPosInf) + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ); + else if (PartialCheck == fcInf) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ); + else { // ISD::fcNegInf + APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt(); + SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT); + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ); + } + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcNan) { + APInt InfWithQnanBit = Inf | QNaNBitMask; + SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT); + if (PartialCheck == fcNan) { + // isnan(V) ==> abs(V) > int(inf) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT); + if (IsF80) { + // Recognize unsupported values as NaNs for compatibility with glibc. + // In them (exp(V)==0) == int_bit. + SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV); + SDValue ExpIsZero = + DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); + SDValue IsPseudo = + DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ); + PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo); + } + } else if (PartialCheck == fcQNan) { + // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit) + PartialRes = + DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE); + } else { // ISD::fcSNan + // issignaling(V) ==> abs(V) > unsigned(Inf) && + // abs(V) < (unsigned(Inf) | quiet_bit) + SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT); + SDValue IsNotQnan = + DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan); + } + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set) + SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV; + SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT); + SDValue VMinusOneV = + DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT)); + PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT); + if (PartialCheck == fcNegSubnormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcNormal) { + // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1)) + APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); + SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT); + SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV); + APInt ExpLimit = ExpMask - ExpLSB; + SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT); + PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT); + if (PartialCheck == fcNegNormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + else if (PartialCheck == fcPosNormal) { + SDValue PosSignV = + DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV); + } + if (IsF80) + PartialRes = + DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet()); + appendResult(PartialRes); + } + + if (!Res) + return DAG.getConstant(IsInverted, DL, ResultVT); + if (IsInverted) + Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask); + return Res; +} + // Only expand vector types if we have the appropriate vector bit operations. static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { assert(VT.isVector() && "Expected vector type"); @@ -7116,8 +7749,6 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, @@ -7137,13 +7768,28 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT))), Mask0F); - // v = (v * 0x01010101...) >> (Len - 8) - if (Len > 8) - Op = - DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); - return Op; + if (Len <= 8) + return Op; + + // Avoid the multiply if we only have 2 bytes to add. + // TODO: Only doing this for scalars because vectors weren't as obviously + // improved. + if (Len == 16 && !VT.isVector()) { + // v = (v + (v >> 8)) & 0x00FF; + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(8, dl, ShVT))), + DAG.getConstant(0xFF, dl, VT)); + } + + // v = (v * 0x01010101...) >> (Len - 8) + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + return DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); } SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { @@ -7265,6 +7911,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::UMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -7272,6 +7919,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { + Op = DAG.getFreeze(Op); SDValue Zero = DAG.getConstant(0, dl, VT); return DAG.getNode(ISD::SMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); @@ -7285,16 +7933,17 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + Op = DAG.getFreeze(Op); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); - if (!IsNegative) { - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + + // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y) + if (!IsNegative) + return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift); // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } @@ -8041,23 +8690,6 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, return SDValue(); } -// Convert redundant addressing modes (e.g. scaling is redundant -// when accessing bytes). -ISD::MemIndexType -TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, - SDValue Offsets) const { - bool IsScaledIndex = - (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED); - bool IsSignedIndex = - (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); - - // Scaling is unimportant for bytes, canonicalize to unscaled. - if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) - return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; - - return IndexType; -} - SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); @@ -8473,8 +9105,20 @@ void TargetLowering::expandUADDSUBO( EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType( DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); - ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; - SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + SDValue SetCC; + if (IsAdd && isOneConstant(RHS)) { + // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces + // the live range of X. We assume comparing with 0 is cheap. + // The general case (X + C) < C is not necessarily beneficial. Although we + // reduce the live range of X, we may introduce the materialization of + // constant C. + SetCC = + DAG.getSetCC(dl, SetCCType, Result, + DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ); + } else { + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; + SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + } Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); } @@ -8773,11 +9417,11 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, // floating-point values. APInt MinInt, MaxInt; if (IsSigned) { - MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); - MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth); } else { - MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); - MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + MinInt = APInt::getMinValue(SatWidth).zext(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth); } // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as @@ -8931,13 +9575,16 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, - SDValue &CC, bool &NeedInvert, + SDValue &CC, SDValue Mask, + SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; + assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset"); + bool IsNonVP = !EVL; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); @@ -9044,17 +9691,34 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + if (IsNonVP) { + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + } else { + SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL); + SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL); + } } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + if (IsNonVP) { + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + } else { + SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL); + SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL); + } } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), SetCC2.getValue(1)); - LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + if (IsNonVP) + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + else { + // Transform the binary opcode to the VP equivalent. + assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode"); + Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND; + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL); + } RHS = SDValue(); CC = SDValue(); return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 43a54ce33bf0..5f9ade18f15c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -39,7 +39,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include <cassert> -#include <cstddef> #include <string> #include <utility> #include <vector> @@ -362,7 +361,7 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) { // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup", /*HandleExceptions=*/true, - DTU.hasValue() ? DTU.getPointer() : nullptr); + DTU ? DTU.getPointer() : nullptr); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 8211e3d6a9dd..1fcee02184a9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -413,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Val = Builder.CreateCall(StackAddrFn, {}, "sp"); Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); - // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. + // Call the setup_dispatch intrinsic. It fills in the rest of the jmpbuf. Builder.CreateCall(BuiltinSetupDispatchFn, {}); // Store a pointer to the function context so that the back-end will know diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp index 7f9518e4c075..140a91ae342b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.cpp @@ -389,17 +389,34 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const { } #endif -LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM, - LiveInterval &LI) { - for (LiveInterval::SubRange &S : LI.subranges()) +/// Find a subrange corresponding to the exact lane mask @p LM in the live +/// interval @p LI. The interval @p LI is assumed to contain such a subrange. +/// This function is used to find corresponding subranges between the +/// original interval and the new intervals. +template <typename T> auto &getSubrangeImpl(LaneBitmask LM, T &LI) { + for (auto &S : LI.subranges()) if (S.LaneMask == LM) return S; llvm_unreachable("SubRange for this mask not found"); } -LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM, - LiveInterval &LI) { - for (LiveInterval::SubRange &S : LI.subranges()) +LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, + LiveInterval &LI) { + return getSubrangeImpl(LM, LI); +} + +const LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, + const LiveInterval &LI) { + return getSubrangeImpl(LM, LI); +} + +/// Find a subrange corresponding to the lane mask @p LM, or a superset of it, +/// in the live interval @p LI. The interval @p LI is assumed to contain such +/// a subrange. This function is used to find corresponding subranges between +/// the original interval and the new intervals. +const LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, + const LiveInterval &LI) { + for (const LiveInterval::SubRange &S : LI.subranges()) if ((S.LaneMask & LM) == LM) return S; llvm_unreachable("SubRange for this mask not found"); @@ -566,10 +583,8 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, return Def; } -VNInfo *SplitEditor::defFromParent(unsigned RegIdx, - VNInfo *ParentVNI, - SlotIndex UseIdx, - MachineBasicBlock &MBB, +VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI, + SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { SlotIndex Def; LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); @@ -937,7 +952,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, void SplitEditor::computeRedundantBackCopies( DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) { LiveInterval *LI = &LIS.getInterval(Edit->get(0)); - LiveInterval *Parent = &Edit->getParent(); + const LiveInterval *Parent = &Edit->getParent(); SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums()); SmallPtrSet<VNInfo *, 8> DominatedVNIs; @@ -952,7 +967,7 @@ void SplitEditor::computeRedundantBackCopies( // For VNI aggregation of each ParentVNI, collect dominated, i.e., // redundant VNIs to BackCopies. for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { - VNInfo *ParentVNI = Parent->getValNumInfo(i); + const VNInfo *ParentVNI = Parent->getValNumInfo(i); if (!NotToHoistSet.count(ParentVNI->id)) continue; SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin(); @@ -990,7 +1005,7 @@ void SplitEditor::computeRedundantBackCopies( void SplitEditor::hoistCopies() { // Get the complement interval, always RegIdx 0. LiveInterval *LI = &LIS.getInterval(Edit->get(0)); - LiveInterval *Parent = &Edit->getParent(); + const LiveInterval *Parent = &Edit->getParent(); // Track the nearest common dominator for all back-copies for each ParentVNI, // indexed by ParentVNI->id. @@ -1067,7 +1082,7 @@ void SplitEditor::hoistCopies() { if (!Dom.first || Dom.second.isValid()) continue; // This value needs a hoisted copy inserted at the end of Dom.first. - VNInfo *ParentVNI = Parent->getValNumInfo(i); + const VNInfo *ParentVNI = Parent->getValNumInfo(i); MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); // Get a less loopy dominator than Dom.first. Dom.first = findShallowDominator(Dom.first, DefMBB); @@ -1237,11 +1252,11 @@ void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveIntervalCalc &LIC, SlotIndex LastUse = End.getPrevSlot(); // The predecessor may not have a live-out value. That is OK, like an // undef PHI operand. - LiveInterval &PLI = Edit->getParent(); + const LiveInterval &PLI = Edit->getParent(); // Need the cast because the inputs to ?: would otherwise be deemed // "incompatible": SubRange vs LiveInterval. - LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI) - : static_cast<LiveRange &>(PLI); + const LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI) + : static_cast<const LiveRange &>(PLI); if (PSR.liveAt(LastUse)) LIC.extend(LR, End, /*PhysReg=*/0, Undefs); } @@ -1254,7 +1269,7 @@ void SplitEditor::extendPHIKillRanges() { // remove it. Otherwise, extend the live interval to reach the end indexes // of all predecessor blocks. - LiveInterval &ParentLI = Edit->getParent(); + const LiveInterval &ParentLI = Edit->getParent(); for (const VNInfo *V : ParentLI.valnos) { if (V->isUnused() || !V->isPHIDef()) continue; @@ -1270,7 +1285,7 @@ void SplitEditor::extendPHIKillRanges() { SmallVector<SlotIndex, 4> Undefs; LiveIntervalCalc SubLIC; - for (LiveInterval::SubRange &PS : ParentLI.subranges()) { + for (const LiveInterval::SubRange &PS : ParentLI.subranges()) { for (const VNInfo *V : PS.valnos) { if (V->isUnused() || !V->isPHIDef()) continue; @@ -1337,13 +1352,34 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { continue; // We may want to extend a live range for a partial redef, or for a use // tied to an early clobber. - Idx = Idx.getPrevSlot(); - if (!Edit->getParent().liveAt(Idx)) + if (!Edit->getParent().liveAt(Idx.getPrevSlot())) continue; - } else - Idx = Idx.getRegSlot(true); + } else { + assert(MO.isUse()); + bool IsEarlyClobber = false; + if (MO.isTied()) { + // We want to extend a live range into `e` slot rather than `r` slot if + // tied-def is early clobber, because the `e` slot already contained + // in the live range of early-clobber tied-def operand, give an example + // here: + // 0 %0 = ... + // 16 early-clobber %0 = Op %0 (tied-def 0), ... + // 32 ... = Op %0 + // Before extend: + // %0 = [0r, 0d) [16e, 32d) + // The point we want to extend is 0d to 16e not 16r in this case, but if + // we use 16r here we will extend nothing because that already contained + // in [16e, 32d). + unsigned OpIdx = MI->getOperandNo(&MO); + unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx); + const MachineOperand &DefOp = MI->getOperand(DefOpIdx); + IsEarlyClobber = DefOp.isEarlyClobber(); + } + + Idx = Idx.getRegSlot(IsEarlyClobber); + } - SlotIndex Next = Idx.getNextSlot(); + SlotIndex Next = Idx; if (LI.hasSubRanges()) { // We have to delay extending subranges until we have seen all operands // defining the register. This is because a <def,read-undef> operand @@ -1510,9 +1546,8 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { // Provide a reverse mapping from original indices to Edit ranges. if (LRMap) { - LRMap->clear(); - for (unsigned i = 0, e = Edit->size(); i != e; ++i) - LRMap->push_back(i); + auto Seq = llvm::seq<unsigned>(0, Edit->size()); + LRMap->assign(Seq.begin(), Seq.end()); } // Now check if any registers were separated into multiple components. diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h index 902546fe16d8..4400a797d38e 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm-project/llvm/lib/CodeGen/SplitKit.h @@ -22,19 +22,19 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalCalc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Compiler.h" #include <utility> namespace llvm { class AAResults; +class LiveInterval; +class LiveRange; class LiveIntervals; class LiveRangeEdit; class MachineBlockFrequencyInfo; @@ -346,19 +346,6 @@ private: return LICalc[SpillMode != SM_Partition && RegIdx != 0]; } - /// Find a subrange corresponding to the exact lane mask @p LM in the live - /// interval @p LI. The interval @p LI is assumed to contain such a subrange. - /// This function is used to find corresponding subranges between the - /// original interval and the new intervals. - LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, - LiveInterval &LI); - - /// Find a subrange corresponding to the lane mask @p LM, or a superset of it, - /// in the live interval @p LI. The interval @p LI is assumed to contain such - /// a subrange. This function is used to find corresponding subranges between - /// the original interval and the new intervals. - LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI); - /// Add a segment to the interval LI for the value number VNI. If LI has /// subranges, corresponding segments will be added to them as well, but /// with newly created value numbers. If Original is true, dead def will @@ -390,10 +377,8 @@ private: /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. - VNInfo *defFromParent(unsigned RegIdx, - VNInfo *ParentVNI, - SlotIndex UseIdx, - MachineBasicBlock &MBB, + VNInfo *defFromParent(unsigned RegIdx, const VNInfo *ParentVNI, + SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I); /// removeBackCopies - Remove the copy instructions that defines the values diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp index 623d5da9831e..11c6bdc69956 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackColoring.cpp @@ -36,14 +36,12 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Use.h" @@ -1145,6 +1143,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n"); LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n"); LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n"); + (void) FixedMemOp; + (void) FixedDbg; + (void) FixedInstr; } void StackColoring::removeInvalidSlotRanges() { @@ -1319,6 +1320,11 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int FirstSlot = SortedSlots[I]; int SecondSlot = SortedSlots[J]; + + // Objects with different stack IDs cannot be merged. + if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot)) + continue; + LiveInterval *First = &*Intervals[FirstSlot]; LiveInterval *Second = &*Intervals[SecondSlot]; auto &FirstS = LiveStarts[FirstSlot]; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 3640296adbca..b83c56903133 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -17,9 +17,9 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp index 36e8f129ea15..6757d6ca4f88 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackMaps.cpp @@ -721,7 +721,7 @@ void StackMaps::serializeToStackMapSection() { // Create the section. MCSection *StackMapSection = OutContext.getObjectFileInfo()->getStackMapSection(); - OS.SwitchSection(StackMapSection); + OS.switchSection(StackMapSection); // Emit a dummy symbol to force section inclusion. OS.emitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); @@ -732,7 +732,7 @@ void StackMaps::serializeToStackMapSection() { emitFunctionFrameRecords(OS); emitConstantPoolEntries(OS); emitCallsiteEntries(OS); - OS.AddBlankLine(); + OS.addBlankLine(); // Clean up. CSInfos.clear(); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp index 6765fd274686..510a8e3e4ba2 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackProtector.cpp @@ -28,8 +28,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -169,7 +167,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI, // If this instruction accesses memory make sure it doesn't access beyond // the bounds of the allocated object. Optional<MemoryLocation> MemLoc = MemoryLocation::getOrNone(I); - if (MemLoc.hasValue() && MemLoc->Size.hasValue() && + if (MemLoc && MemLoc->Size.hasValue() && !TypeSize::isKnownGE(AllocSize, TypeSize::getFixed(MemLoc->Size.getValue()))) return true; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp index 17e6f51d0899..b8c750688914 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp index 20892a79d35f..bf3d2088e196 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplication.cpp @@ -14,14 +14,14 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/PassRegistry.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp index 68a7b80d6146..ba533a491b9c 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TailDuplicator.cpp @@ -19,17 +19,15 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -370,8 +368,8 @@ void TailDuplicator::processPHI( return; // Remove PredBB from the PHI node. - MI->RemoveOperand(SrcOpIdx + 1); - MI->RemoveOperand(SrcOpIdx); + MI->removeOperand(SrcOpIdx + 1); + MI->removeOperand(SrcOpIdx); if (MI->getNumOperands() == 1) MI->eraseFromParent(); } @@ -385,8 +383,9 @@ void TailDuplicator::duplicateInstruction( // Allow duplication of CFI instructions. if (MI->isCFIInstruction()) { BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), - TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( - MI->getOperand(0).getCFIIndex()); + TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MI->getOperand(0).getCFIIndex()) + .setMIFlags(MI->getFlags()); return; } MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); @@ -496,15 +495,15 @@ void TailDuplicator::updateSuccessorsPHIs( for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { MachineOperand &MO = MI.getOperand(i + 1); if (MO.getMBB() == FromBB) { - MI.RemoveOperand(i + 1); - MI.RemoveOperand(i); + MI.removeOperand(i + 1); + MI.removeOperand(i); } } } else Idx = 0; // If Idx is set, the operands at Idx and Idx+1 must be removed. - // We reuse the location to avoid expensive RemoveOperand calls. + // We reuse the location to avoid expensive removeOperand calls. DenseMap<Register, AvailableValsTy>::iterator LI = SSAUpdateVals.find(Reg); @@ -541,8 +540,8 @@ void TailDuplicator::updateSuccessorsPHIs( } } if (Idx != 0) { - MI.RemoveOperand(Idx + 1); - MI.RemoveOperand(Idx); + MI.removeOperand(Idx + 1); + MI.removeOperand(Idx); } } } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index fbf190a52585..9430e86fe44d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -10,17 +10,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetMachine.h" @@ -37,6 +37,11 @@ bool TargetFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const return false; } +bool TargetFrameLowering::enableCFIFixup(MachineFunction &MF) const { + return MF.needsFrameMoves() && + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); +} + /// Returns the displacement from the frame register to the stack /// frame of the specified index, along with the frame register used /// (in output arg FrameReg). This is the default implementation which diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp index 3f22cc4289f2..2a987ee3eedf 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -31,8 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" -#include <cctype> using namespace llvm; @@ -40,8 +39,7 @@ static cl::opt<bool> DisableHazardRecognizer( "disable-sched-hazard", cl::Hidden, cl::init(false), cl::desc("Disable hazard detection during preRA scheduling")); -TargetInstrInfo::~TargetInstrInfo() { -} +TargetInstrInfo::~TargetInstrInfo() = default; const TargetRegisterClass* TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, @@ -873,11 +871,13 @@ void TargetInstrInfo::reassociateOps( MachineInstrBuilder MIB1 = BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); + .addReg(RegY, getKillRegState(KillY)) + .setMIFlags(Prev.getFlags()); MachineInstrBuilder MIB2 = BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); + .addReg(NewVR, getKillRegState(true)) + .setMIFlags(Root.getFlags()); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); @@ -1399,7 +1399,7 @@ std::string TargetInstrInfo::createMIROperandComment( return OS.str(); } -TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {} +TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() = default; void TargetInstrInfo::mergeOutliningCandidateAttributes( Function &F, std::vector<outliner::Candidate> &Candidates) const { diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp index f69e50eaa0ca..f7f4a4e3db6a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -56,7 +56,6 @@ #include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> -#include <cstddef> #include <cstdint> #include <cstring> #include <iterator> @@ -202,7 +201,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl"); } - if (TT.isPS4CPU()) { + if (TT.isPS()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); } @@ -275,6 +274,11 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { return FPROUND_F128_F16; if (OpVT == MVT::ppcf128) return FPROUND_PPCF128_F16; + } else if (RetVT == MVT::bf16) { + if (OpVT == MVT::f32) + return FPROUND_F32_BF16; + if (OpVT == MVT::f64) + return FPROUND_F64_BF16; } else if (RetVT == MVT::f32) { if (OpVT == MVT::f64) return FPROUND_F64_F32; @@ -740,6 +744,30 @@ void TargetLoweringBase::initActions() { std::fill(std::begin(TargetDAGCombineArray), std::end(TargetDAGCombineArray), 0); + // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to + // remove this and targets should individually set these types if not legal. + for (ISD::NodeType NT : enum_seq(ISD::DELETED_NODE, ISD::BUILTIN_OP_END, + force_iteration_on_noniterable_enum)) { + for (MVT VT : {MVT::i2, MVT::i4}) + OpActions[(unsigned)VT.SimpleTy][NT] = Expand; + } + for (MVT AVT : MVT::all_valuetypes()) { + for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) { + setTruncStoreAction(AVT, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand); + } + } + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + for (MVT VT : {MVT::i2, MVT::i4}) { + setIndexedLoadAction(IM, VT, Expand); + setIndexedStoreAction(IM, VT, Expand); + setIndexedMaskedLoadAction(IM, VT, Expand); + setIndexedMaskedStoreAction(IM, VT, Expand); + } + } + for (MVT VT : MVT::fp_valuetypes()) { MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits()); if (IntVT.isValid()) { @@ -763,85 +791,63 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); // These operations default to expand. - setOperationAction(ISD::FGETSIGN, VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); - setOperationAction(ISD::FMINNUM, VT, Expand); - setOperationAction(ISD::FMAXNUM, VT, Expand); - setOperationAction(ISD::FMINNUM_IEEE, VT, Expand); - setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand); - setOperationAction(ISD::FMINIMUM, VT, Expand); - setOperationAction(ISD::FMAXIMUM, VT, Expand); - setOperationAction(ISD::FMAD, VT, Expand); - setOperationAction(ISD::SMIN, VT, Expand); - setOperationAction(ISD::SMAX, VT, Expand); - setOperationAction(ISD::UMIN, VT, Expand); - setOperationAction(ISD::UMAX, VT, Expand); - setOperationAction(ISD::ABS, VT, Expand); - setOperationAction(ISD::FSHL, VT, Expand); - setOperationAction(ISD::FSHR, VT, Expand); - setOperationAction(ISD::SADDSAT, VT, Expand); - setOperationAction(ISD::UADDSAT, VT, Expand); - setOperationAction(ISD::SSUBSAT, VT, Expand); - setOperationAction(ISD::USUBSAT, VT, Expand); - setOperationAction(ISD::SSHLSAT, VT, Expand); - setOperationAction(ISD::USHLSAT, VT, Expand); - setOperationAction(ISD::SMULFIX, VT, Expand); - setOperationAction(ISD::SMULFIXSAT, VT, Expand); - setOperationAction(ISD::UMULFIX, VT, Expand); - setOperationAction(ISD::UMULFIXSAT, VT, Expand); - setOperationAction(ISD::SDIVFIX, VT, Expand); - setOperationAction(ISD::SDIVFIXSAT, VT, Expand); - setOperationAction(ISD::UDIVFIX, VT, Expand); - setOperationAction(ISD::UDIVFIXSAT, VT, Expand); - setOperationAction(ISD::FP_TO_SINT_SAT, VT, Expand); - setOperationAction(ISD::FP_TO_UINT_SAT, VT, Expand); + setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS, + ISD::FMINNUM, ISD::FMAXNUM, + ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, + ISD::FMINIMUM, ISD::FMAXIMUM, + ISD::FMAD, ISD::SMIN, + ISD::SMAX, ISD::UMIN, + ISD::UMAX, ISD::ABS, + ISD::FSHL, ISD::FSHR, + ISD::SADDSAT, ISD::UADDSAT, + ISD::SSUBSAT, ISD::USUBSAT, + ISD::SSHLSAT, ISD::USHLSAT, + ISD::SMULFIX, ISD::SMULFIXSAT, + ISD::UMULFIX, ISD::UMULFIXSAT, + ISD::SDIVFIX, ISD::SDIVFIXSAT, + ISD::UDIVFIX, ISD::UDIVFIXSAT, + ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, + ISD::IS_FPCLASS}, + VT, Expand); // Overflow operations default to expand - setOperationAction(ISD::SADDO, VT, Expand); - setOperationAction(ISD::SSUBO, VT, Expand); - setOperationAction(ISD::UADDO, VT, Expand); - setOperationAction(ISD::USUBO, VT, Expand); - setOperationAction(ISD::SMULO, VT, Expand); - setOperationAction(ISD::UMULO, VT, Expand); + setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO, + ISD::SMULO, ISD::UMULO}, + VT, Expand); // ADDCARRY operations default to expand - setOperationAction(ISD::ADDCARRY, VT, Expand); - setOperationAction(ISD::SUBCARRY, VT, Expand); - setOperationAction(ISD::SETCCCARRY, VT, Expand); - setOperationAction(ISD::SADDO_CARRY, VT, Expand); - setOperationAction(ISD::SSUBO_CARRY, VT, Expand); + setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY, ISD::SETCCCARRY, + ISD::SADDO_CARRY, ISD::SSUBO_CARRY}, + VT, Expand); // ADDC/ADDE/SUBC/SUBE default to expand. - setOperationAction(ISD::ADDC, VT, Expand); - setOperationAction(ISD::ADDE, VT, Expand); - setOperationAction(ISD::SUBC, VT, Expand); - setOperationAction(ISD::SUBE, VT, Expand); + setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, + Expand); + + // Halving adds + setOperationAction( + {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, + Expand); // Absolute difference - setOperationAction(ISD::ABDS, VT, Expand); - setOperationAction(ISD::ABDU, VT, Expand); + setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand); // These default to Expand so they will be expanded to CTLZ/CTTZ by default. - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, + Expand); - setOperationAction(ISD::BITREVERSE, VT, Expand); - setOperationAction(ISD::PARITY, VT, Expand); + setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction(ISD::FROUND, VT, Expand); - setOperationAction(ISD::FROUNDEVEN, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand); // These operations default to expand for vector types. - if (VT.isVector()) { - setOperationAction(ISD::FCOPYSIGN, VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); - setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); - setOperationAction(ISD::SPLAT_VECTOR, VT, Expand); - } + if (VT.isVector()) + setOperationAction({ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, + ISD::ANY_EXTEND_VECTOR_INREG, + ISD::SIGN_EXTEND_VECTOR_INREG, + ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SPLAT_VECTOR}, + VT, Expand); // Constrained floating-point operations default to expand. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ @@ -852,21 +858,13 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); // Vector reduction default to expand. - setOperationAction(ISD::VECREDUCE_FADD, VT, Expand); - setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand); - setOperationAction(ISD::VECREDUCE_ADD, VT, Expand); - setOperationAction(ISD::VECREDUCE_MUL, VT, Expand); - setOperationAction(ISD::VECREDUCE_AND, VT, Expand); - setOperationAction(ISD::VECREDUCE_OR, VT, Expand); - setOperationAction(ISD::VECREDUCE_XOR, VT, Expand); - setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand); - setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand); - setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand); - setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand); - setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand); - setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand); - setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand); - setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand); + setOperationAction( + {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD, + ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, + ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, + ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX, + ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL}, + VT, Expand); // Named vector shuffles default to expand. setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); @@ -881,30 +879,16 @@ void TargetLoweringBase::initActions() { // ConstantFP nodes default to expand. Targets can either change this to // Legal, in which case all fp constants are legal, or use isFPImmLegal() // to optimize expansions for certain constants. - setOperationAction(ISD::ConstantFP, MVT::f16, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f80, Expand); - setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + setOperationAction(ISD::ConstantFP, + {MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, + Expand); // These library functions default to expand. - for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { - setOperationAction(ISD::FCBRT, VT, Expand); - setOperationAction(ISD::FLOG , VT, Expand); - setOperationAction(ISD::FLOG2, VT, Expand); - setOperationAction(ISD::FLOG10, VT, Expand); - setOperationAction(ISD::FEXP , VT, Expand); - setOperationAction(ISD::FEXP2, VT, Expand); - setOperationAction(ISD::FFLOOR, VT, Expand); - setOperationAction(ISD::FNEARBYINT, VT, Expand); - setOperationAction(ISD::FCEIL, VT, Expand); - setOperationAction(ISD::FRINT, VT, Expand); - setOperationAction(ISD::FTRUNC, VT, Expand); - setOperationAction(ISD::LROUND, VT, Expand); - setOperationAction(ISD::LLROUND, VT, Expand); - setOperationAction(ISD::LRINT, VT, Expand); - setOperationAction(ISD::LLRINT, VT, Expand); - } + setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, + ISD::FEXP2, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, + ISD::FRINT, ISD::FTRUNC, ISD::LROUND, ISD::LLROUND, + ISD::LRINT, ISD::LLRINT}, + {MVT::f32, MVT::f64, MVT::f128}, Expand); // Default ISD::TRAP to expand (which turns it into abort). setOperationAction(ISD::TRAP, MVT::Other, Expand); @@ -1394,6 +1378,16 @@ void TargetLoweringBase::computeRegisterProperties( } } + // Decide how to handle bf16. If the target does not have native bf16 support, + // promote it to f32, because there are no bf16 library calls (except for + // converting from f32 to bf16). + if (!isTypeLegal(MVT::bf16)) { + NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::bf16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::bf16, TypePromoteFloat); + } + // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1647,6 +1641,11 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, (NumCases * 100 >= Range * MinDensity); } +MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context, + EVT ConditionVT) const { + return getRegisterType(Context, ConditionVT); +} + /// Get the EVTs and ArgFlags collections that represent the legalized return /// type of the given function. This does not require a DAG or a return value, /// and is suitable for use before any DAGs for the function are constructed. @@ -2066,9 +2065,11 @@ static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { Name += IsSqrt ? "sqrt" : "div"; - // TODO: Handle "half" or other float types? + // TODO: Handle other float types? if (VT.getScalarType() == MVT::f64) { Name += "d"; + } else if (VT.getScalarType() == MVT::f16) { + Name += "h"; } else { assert(VT.getScalarType() == MVT::f32 && "Unexpected FP type for reciprocal estimate"); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index ce350034d073..f3d68bd9c92d 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -310,7 +310,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, auto *S = C.getELFSection(".linker-options", ELF::SHT_LLVM_LINKER_OPTIONS, ELF::SHF_EXCLUDE); - Streamer.SwitchSection(S); + Streamer.switchSection(S); for (const auto *Operand : LinkerOptions->operands()) { if (cast<MDNode>(Operand)->getNumOperands() != 2) @@ -326,7 +326,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1); - Streamer.SwitchSection(S); + Streamer.switchSection(S); for (const auto *Operand : DependentLibraries->operands()) { Streamer.emitBytes( @@ -350,7 +350,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, auto *S = C.getObjectFileInfo()->getPseudoProbeDescSection( TM->getFunctionSections() ? Name->getString() : StringRef()); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitInt64(GUID->getZExtValue()); Streamer.emitInt64(Hash->getZExtValue()); Streamer.emitULEB128IntValue(Name->getString().size()); @@ -365,11 +365,11 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer, GetObjCImageInfo(M, Version, Flags, Section); if (!Section.empty()) { auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.emitInt32(Version); Streamer.emitInt32(Flags); - Streamer.AddBlankLine(); + Streamer.addBlankLine(); } emitCGProfileMetadata(Streamer, M); @@ -399,7 +399,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(), ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); - Streamer.SwitchSection(Sec); + Streamer.switchSection(Sec); Streamer.emitValueToAlignment(DL.getPointerABIAlignment(0).value()); Streamer.emitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::create(Size, getContext()); @@ -449,6 +449,9 @@ static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { Name == ".llvmbc" || Name == ".llvmcmd") return SectionKind::getMetadata(); + if (Name == ".llvm.offloading") + return SectionKind::getExclude(); + if (Name.empty() || Name[0] != '.') return K; // Default implementation based on some magic section names. @@ -507,9 +510,12 @@ static unsigned getELFSectionType(StringRef Name, SectionKind K) { static unsigned getELFSectionFlags(SectionKind K) { unsigned Flags = 0; - if (!K.isMetadata()) + if (!K.isMetadata() && !K.isExclude()) Flags |= ELF::SHF_ALLOC; + if (K.isExclude()) + Flags |= ELF::SHF_EXCLUDE; + if (K.isText()) Flags |= ELF::SHF_EXECINSTR; @@ -681,9 +687,10 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName, } if (Retain) { - if ((Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && - !TM.getTargetTriple().isOSSolaris()) + if (TM.getTargetTriple().isOSSolaris()) + Flags |= ELF::SHF_SUNW_NODISCARD; + else if (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) Flags |= ELF::SHF_GNU_RETAIN; return NextUniqueID++; } @@ -860,12 +867,15 @@ static MCSection *selectELFSectionForGlobal( EmitUniqueSection = true; Flags |= ELF::SHF_LINK_ORDER; } - if (Retain && - (Ctx.getAsmInfo()->useIntegratedAssembler() || - Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) && - !TM.getTargetTriple().isOSSolaris()) { - EmitUniqueSection = true; - Flags |= ELF::SHF_GNU_RETAIN; + if (Retain) { + if (TM.getTargetTriple().isOSSolaris()) { + EmitUniqueSection = true; + Flags |= ELF::SHF_SUNW_NODISCARD; + } else if (Ctx.getAsmInfo()->useIntegratedAssembler() || + Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) { + EmitUniqueSection = true; + Flags |= ELF::SHF_GNU_RETAIN; + } } MCSectionELF *Section = selectELFSectionForGlobal( @@ -1171,6 +1181,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } +MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + // TODO(yln): Remove -lower-global-dtors-via-cxa-atexit fallback flag + // (LowerGlobalDtorsViaCxaAtExit) and always issue a fatal error here. + if (TM->Options.LowerGlobalDtorsViaCxaAtExit) + report_fatal_error("@llvm.global_dtors should have been lowered already"); + return StaticDtorSection; +} + void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, Module &M) const { // Emit the linker options if present. @@ -1207,12 +1226,12 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer, // Get the section. MCSectionMachO *S = getContext().getMachOSection( Segment, Section, TAA, StubSize, SectionKind::getData()); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitLabel(getContext(). getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); Streamer.emitInt32(VersionVal); Streamer.emitInt32(ImageInfoFlags); - Streamer.AddBlankLine(); + Streamer.addBlankLine(); } static void checkMachOComdat(const GlobalValue *GV) { @@ -1520,6 +1539,9 @@ getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) { if (K.isMetadata()) Flags |= COFF::IMAGE_SCN_MEM_DISCARDABLE; + else if (K.isExclude()) + Flags |= + COFF::IMAGE_SCN_LNK_REMOVE | COFF::IMAGE_SCN_MEM_DISCARDABLE; else if (K.isText()) Flags |= COFF::IMAGE_SCN_MEM_EXECUTE | @@ -1755,11 +1777,11 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); - Streamer.SwitchSection(S); + Streamer.switchSection(S); Streamer.emitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); Streamer.emitInt32(Version); Streamer.emitInt32(Flags); - Streamer.AddBlankLine(); + Streamer.addBlankLine(); } emitCGProfileMetadata(Streamer, M); @@ -1772,7 +1794,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( // spec, this section is a space-separated string containing flags for // linker. MCSection *Sec = getDrectveSection(); - Streamer.SwitchSection(Sec); + Streamer.switchSection(Sec); for (const auto *Option : LinkerOptions->operands()) { for (const auto &Piece : cast<MDNode>(Option)->operands()) { // Lead with a space for consistency with our dllexport implementation. @@ -1791,7 +1813,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( getMangler()); OS.flush(); if (!Flags.empty()) { - Streamer.SwitchSection(getDrectveSection()); + Streamer.switchSection(getDrectveSection()); Streamer.emitBytes(Flags); } Flags.clear(); @@ -1817,7 +1839,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerDirectives( OS.flush(); if (!Flags.empty()) { - Streamer.SwitchSection(getDrectveSection()); + Streamer.switchSection(getDrectveSection()); Streamer.emitBytes(Flags); } Flags.clear(); @@ -2170,8 +2192,7 @@ MCSection *TargetLoweringObjectFileWasm::getStaticCtorSection( MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { - llvm_unreachable("@llvm.global_dtors should have been lowered already"); - return nullptr; + report_fatal_error("@llvm.global_dtors should have been lowered already"); } //===----------------------------------------------------------------------===// @@ -2544,10 +2565,24 @@ MCSection *TargetLoweringObjectFileXCOFF::getSectionForTOCEntry( XCOFF::XTY_SD)); } +MCSection *TargetLoweringObjectFileXCOFF::getSectionForLSDA( + const Function &F, const MCSymbol &FnSym, const TargetMachine &TM) const { + auto *LSDA = cast<MCSectionXCOFF>(LSDASection); + if (TM.getFunctionSections()) { + // If option -ffunction-sections is on, append the function name to the + // name of the LSDA csect so that each function has its own LSDA csect. + // This helps the linker to garbage-collect EH info of unused functions. + SmallString<128> NameStr = LSDA->getName(); + raw_svector_ostream(NameStr) << '.' << F.getName(); + LSDA = getContext().getXCOFFSection(NameStr, LSDA->getKind(), + LSDA->getCsectProp()); + } + return LSDA; +} //===----------------------------------------------------------------------===// // GOFF //===----------------------------------------------------------------------===// -TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() {} +TargetLoweringObjectFileGOFF::TargetLoweringObjectFileGOFF() = default; MCSection *TargetLoweringObjectFileGOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { @@ -2558,8 +2593,8 @@ MCSection *TargetLoweringObjectFileGOFF::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { auto *Symbol = TM.getSymbol(GO); if (Kind.isBSS()) - return getContext().getGOFFSection(Symbol->getName(), - SectionKind::getBSS()); + return getContext().getGOFFSection(Symbol->getName(), SectionKind::getBSS(), + nullptr, nullptr); return getContext().getObjectFileInfo()->getTextSection(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 0731cf9b28f4..af5d10103f78 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp index 05004fb935df..0bd229f4fc68 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePassRegistry.h" @@ -47,7 +48,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/SymbolRewriter.h" #include <cassert> #include <string> @@ -115,20 +115,18 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt<cl::boolOrDefault> VerifyMachineCode("verify-machineinstrs", cl::Hidden, - cl::desc("Verify generated machine code"), - cl::ZeroOrMore); -static cl::opt<cl::boolOrDefault> DebugifyAndStripAll( - "debugify-and-strip-all-safe", cl::Hidden, - cl::desc( - "Debugify MIR before and Strip debug after " - "each pass except those known to be unsafe when debug info is present"), - cl::ZeroOrMore); + cl::desc("Verify generated machine code")); +static cl::opt<cl::boolOrDefault> + DebugifyAndStripAll("debugify-and-strip-all-safe", cl::Hidden, + cl::desc("Debugify MIR before and Strip debug after " + "each pass except those known to be unsafe " + "when debug info is present")); static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll( "debugify-check-and-strip-all-safe", cl::Hidden, cl::desc( "Debugify MIR before, by checking and stripping the debug info after, " - "each pass except those known to be unsafe when debug info is present"), - cl::ZeroOrMore); + "each pass except those known to be unsafe when debug info is " + "present")); // Enable or disable the MachineOutliner. static cl::opt<RunOutliner> EnableMachineOutliner( "enable-machine-outliner", cl::desc("Enable the machine outliner"), @@ -139,6 +137,11 @@ static cl::opt<RunOutliner> EnableMachineOutliner( "Disable all outlining"), // Sentinel value for unspecified option. clEnumValN(RunOutliner::AlwaysOutline, "", ""))); +// Disable the pass to fix unwind information. Whether the pass is included in +// the pipeline is controlled via the target options, this option serves as +// manual override. +static cl::opt<bool> DisableCFIFixup("disable-cfi-fixup", cl::Hidden, + cl::desc("Disable the CFI fixup pass")); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -175,12 +178,12 @@ static cl::opt<bool> // Disable MIRProfileLoader before RegAlloc. This is for for debugging and // tuning purpose. static cl::opt<bool> DisableRAFSProfileLoader( - "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden, + "disable-ra-fsprofile-loader", cl::init(false), cl::Hidden, cl::desc("Disable MIRProfileLoader before RegAlloc")); // Disable MIRProfileLoader before BloackPlacement. This is for for debugging // and tuning purpose. static cl::opt<bool> DisableLayoutFSProfileLoader( - "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden, + "disable-layout-fsprofile-loader", cl::init(false), cl::Hidden, cl::desc("Disable MIRProfileLoader before BlockPlacement")); // Specify FSProfile file name. static cl::opt<std::string> @@ -256,6 +259,11 @@ static cl::opt<bool> DisableExpandReductions( "disable-expand-reductions", cl::init(false), cl::Hidden, cl::desc("Disable the expand reduction intrinsics pass from running")); +/// Disable the select optimization pass. +static cl::opt<bool> DisableSelectOptimize( + "disable-select-optimize", cl::init(true), cl::Hidden, + cl::desc("Disable the select-optimization pass from running")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -490,6 +498,7 @@ CGPassBuilderOption llvm::getCGPassBuilderOption() { SET_BOOLEAN_OPTION(DisableConstantHoisting) SET_BOOLEAN_OPTION(DisableCGP) SET_BOOLEAN_OPTION(DisablePartialLibcallInlining) + SET_BOOLEAN_OPTION(DisableSelectOptimize) SET_BOOLEAN_OPTION(PrintLSR) SET_BOOLEAN_OPTION(PrintISelInput) SET_BOOLEAN_OPTION(PrintGCInfo) @@ -736,21 +745,21 @@ void TargetPassConfig::addPass(Pass *P) { if (StopBefore == PassID && StopBeforeCount++ == StopBeforeInstanceNum) Stopped = true; if (Started && !Stopped) { - if (AddingMachinePasses) + if (AddingMachinePasses) { + // Construct banner message before PM->add() as that may delete the pass. + std::string Banner = + std::string("After ") + std::string(P->getPassName()); addMachinePrePasses(); - std::string Banner; - // Construct banner message before PM->add() as that may delete the pass. - if (AddingMachinePasses) - Banner = std::string("After ") + std::string(P->getPassName()); - PM->add(P); - if (AddingMachinePasses) + PM->add(P); addMachinePostPasses(Banner); + } else { + PM->add(P); + } // Add the passes after the pass P if there is any. - for (const auto &IP : Impl->InsertedPasses) { + for (const auto &IP : Impl->InsertedPasses) if (IP.TargetPassID == PassID) addPass(IP.getInsertedPass()); - } } else { delete P; } @@ -895,6 +904,12 @@ void TargetPassConfig::addIRPasses() { addPass(&ShadowStackGCLoweringID); addPass(createLowerConstantIntrinsicsPass()); + // For MachO, lower @llvm.global_dtors into @llvm_global_ctors with + // __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func. + if (TM->getTargetTriple().isOSBinFormatMachO() && + TM->Options.LowerGlobalDtorsViaCxaAtExit) + addPass(createLowerGlobalDtorsLegacyPass()); + // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); @@ -922,6 +937,13 @@ void TargetPassConfig::addIRPasses() { // Allow disabling it for testing purposes. if (!DisableExpandReductions) addPass(createExpandReductionsPass()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createTLSVariableHoistPass()); + + // Convert conditional moves to conditional jumps when profitable. + if (getOptLevel() != CodeGenOpt::None && !DisableSelectOptimize) + addPass(createSelectOptimizePass()); } /// Turn exception handling constructs into something the code generators can @@ -1261,12 +1283,19 @@ void TargetPassConfig::addMachinePasses() { // FIXME: In principle, BasicBlockSection::Labels and splitting can used // together. Update this check once we have addressed any issues. if (TM->getBBSectionsType() != llvm::BasicBlockSection::None) { - addPass(llvm::createBasicBlockSectionsPass(TM->getBBSectionsFuncListBuf())); + if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { + addPass(llvm::createBasicBlockSectionsProfileReaderPass( + TM->getBBSectionsFuncListBuf())); + } + addPass(llvm::createBasicBlockSectionsPass()); } else if (TM->Options.EnableMachineFunctionSplitter || EnableMachineFunctionSplitter) { addPass(createMachineFunctionSplitterPass()); } + if (!DisableCFIFixup && TM->Options.EnableCFIFixup) + addPass(createCFIFixup()); + // Add passes that directly emit MI after all other MI passes. addPreEmitPass2(); @@ -1376,6 +1405,11 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { return createTargetRegisterAllocator(Optimized); } +bool TargetPassConfig::isCustomizedRegAlloc() { + return RegAlloc != + (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator; +} + bool TargetPassConfig::addRegAssignAndRewriteFast() { if (RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&useDefaultRegisterAllocator && RegAlloc != (RegisterRegAlloc::FunctionPassCtor)&createFastRegisterAllocator) diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 6bcf79547056..ac346585b0f8 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -16,10 +16,11 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp index ce59452fd1b8..ac07c86cab85 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSchedule.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp index e4520d8ccb1e..ba2c8dda7de5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -45,10 +45,6 @@ bool TargetSubtargetInfo::enableRALocalReassignment( return true; } -bool TargetSubtargetInfo::enableAdvancedRASplitCost() const { - return false; -} - bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index dfd962be2882..c44fd9f97383 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -28,7 +28,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" @@ -50,7 +49,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -163,6 +161,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); void eliminateRegSequence(MachineBasicBlock::iterator&); + bool processStatepoint(MachineInstr *MI, TiedOperandMap &TiedOperands); public: static char ID; // Pass identification, replacement for typeid @@ -1629,6 +1628,61 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } +// For every tied operand pair this function transforms statepoint from +// RegA = STATEPOINT ... RegB(tied-def N) +// to +// RegB = STATEPOINT ... RegB(tied-def N) +// and replaces all uses of RegA with RegB. +// No extra COPY instruction is necessary because tied use is killed at +// STATEPOINT. +bool TwoAddressInstructionPass::processStatepoint( + MachineInstr *MI, TiedOperandMap &TiedOperands) { + + bool NeedCopy = false; + for (auto &TO : TiedOperands) { + Register RegB = TO.first; + if (TO.second.size() != 1) { + NeedCopy = true; + continue; + } + + unsigned SrcIdx = TO.second[0].first; + unsigned DstIdx = TO.second[0].second; + + MachineOperand &DstMO = MI->getOperand(DstIdx); + Register RegA = DstMO.getReg(); + + assert(RegB == MI->getOperand(SrcIdx).getReg()); + + if (RegA == RegB) + continue; + + MRI->replaceRegWith(RegA, RegB); + + if (LIS) { + VNInfo::Allocator &A = LIS->getVNInfoAllocator(); + LiveInterval &LI = LIS->getInterval(RegB); + for (auto &S : LIS->getInterval(RegA)) { + VNInfo *VNI = LI.getNextValue(S.start, A); + LiveRange::Segment NewSeg(S.start, S.end, VNI); + LI.addSegment(NewSeg); + } + LIS->removeInterval(RegA); + } + + if (LV) { + if (MI->getOperand(SrcIdx).isKill()) + LV->removeVirtualRegisterKilled(RegB, *MI); + LiveVariables::VarInfo &SrcInfo = LV->getVarInfo(RegB); + LiveVariables::VarInfo &DstInfo = LV->getVarInfo(RegA); + SrcInfo.AliveBlocks |= DstInfo.AliveBlocks; + for (auto *KillMI : DstInfo.Kills) + LV->addVirtualRegisterKilled(RegB, *KillMI, false); + } + } + return !NeedCopy; +} + /// Reduce two-address instructions to two operands. bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; @@ -1722,6 +1776,14 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } } + if (mi->getOpcode() == TargetOpcode::STATEPOINT && + processStatepoint(&*mi, TiedOperands)) { + TiedOperands.clear(); + LLVM_DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + mi = nmi; + continue; + } + // Now iterate over the information collected above. for (auto &TO : TiedOperands) { processTiedPairs(&*mi, TO.second, Dist); @@ -1733,11 +1795,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // From %reg = INSERT_SUBREG %reg, %subreg, subidx // To %reg:subidx = COPY %subreg unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); + mi->removeOperand(3); assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); mi->getOperand(0).setSubReg(SubIdx); mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); - mi->RemoveOperand(1); + mi->removeOperand(1); mi->setDesc(TII->get(TargetOpcode::COPY)); LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); @@ -1858,7 +1920,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { LLVM_DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF"); MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j) - MI.RemoveOperand(j); + MI.removeOperand(j); } else { if (LIS) LIS->RemoveMachineInstrFromMaps(MI); diff --git a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp index 01ea171e5ea2..166a3c413f6a 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/TypePromotion.cpp @@ -24,15 +24,13 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" @@ -44,9 +42,9 @@ using namespace llvm; -static cl::opt<bool> -DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), - cl::desc("Disable type promotion pass")); +static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden, + cl::init(false), + cl::desc("Disable type promotion pass")); // The goal of this pass is to enable more efficient code generation for // operations on narrow types (i.e. types with < 32-bits) and this is a @@ -103,17 +101,16 @@ DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false), namespace { class IRPromoter { LLVMContext &Ctx; - IntegerType *OrigTy = nullptr; unsigned PromotedWidth = 0; - SetVector<Value*> &Visited; - SetVector<Value*> &Sources; - SetVector<Instruction*> &Sinks; + SetVector<Value *> &Visited; + SetVector<Value *> &Sources; + SetVector<Instruction *> &Sinks; SmallPtrSetImpl<Instruction *> &SafeWrap; IntegerType *ExtTy = nullptr; - SmallPtrSet<Value*, 8> NewInsts; - SmallPtrSet<Instruction*, 4> InstsToRemove; - DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap; - SmallPtrSet<Value*, 8> Promoted; + SmallPtrSet<Value *, 8> NewInsts; + SmallPtrSet<Instruction *, 4> InstsToRemove; + DenseMap<Value *, SmallVector<Type *, 4>> TruncTysMap; + SmallPtrSet<Value *, 8> Promoted; void ReplaceAllUsersOfWith(Value *From, Value *To); void ExtendSources(); @@ -123,16 +120,13 @@ class IRPromoter { void Cleanup(); public: - IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width, + IRPromoter(LLVMContext &C, unsigned Width, SetVector<Value *> &visited, SetVector<Value *> &sources, SetVector<Instruction *> &sinks, SmallPtrSetImpl<Instruction *> &wrap) - : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited), + : Ctx(C), PromotedWidth(Width), Visited(visited), Sources(sources), Sinks(sinks), SafeWrap(wrap) { ExtTy = IntegerType::get(Ctx, PromotedWidth); - assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() < - ExtTy->getPrimitiveSizeInBits().getFixedSize() && - "Original type not smaller than extended type"); } void Mutate(); @@ -142,8 +136,8 @@ class TypePromotion : public FunctionPass { unsigned TypeSize = 0; LLVMContext *Ctx = nullptr; unsigned RegisterBitWidth = 0; - SmallPtrSet<Value*, 16> AllVisited; - SmallPtrSet<Instruction*, 8> SafeToPromote; + SmallPtrSet<Value *, 16> AllVisited; + SmallPtrSet<Instruction *, 8> SafeToPromote; SmallPtrSet<Instruction *, 4> SafeWrap; // Does V have the same size result type as TypeSize. @@ -190,7 +184,7 @@ public: bool runOnFunction(Function &F) override; }; -} +} // namespace static bool GenerateSignBits(Instruction *I) { unsigned Opc = I->getOpcode(); @@ -245,7 +239,7 @@ bool TypePromotion::isSource(Value *V) { bool TypePromotion::isSink(Value *V) { // TODO The truncate also isn't actually necessary because we would already // proved that the data value is kept within the range of the original data - // type. + // type. We currently remove any truncs inserted for handling zext sinks. // Sinks are: // - points where the value in the register is being observed, such as an @@ -269,7 +263,7 @@ bool TypePromotion::isSink(Value *V) { /// Return whether this instruction can safely wrap. bool TypePromotion::isSafeWrap(Instruction *I) { - // We can support a, potentially, wrapping instruction (I) if: + // We can support a potentially wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. // - The wrapping value (I) is decreasing, i.e would underflow - wrapping @@ -356,7 +350,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) { if (!OverflowConst.isNonPositive()) return false; - // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that: + // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that: // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2 // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2 if (OverflowConst.sgt(ICmpConst)) { @@ -404,7 +398,7 @@ static bool isPromotedResultSafe(Instruction *I) { } void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { - SmallVector<Instruction*, 4> Users; + SmallVector<Instruction *, 4> Users; Instruction *InstTo = dyn_cast<Instruction>(To); bool ReplacedAll = true; @@ -485,12 +479,18 @@ void IRPromoter::PromoteTree() { continue; if (auto *Const = dyn_cast<ConstantInt>(Op)) { - Constant *NewConst = SafeWrap.contains(I) + // For subtract, we don't need to sext the constant. We only put it in + // SafeWrap because SafeWrap.size() is used elsewhere. + // For cmp, we need to sign extend a constant appearing in either + // operand. For add, we should only sign extend the RHS. + Constant *NewConst = (SafeWrap.contains(I) && + (I->getOpcode() == Instruction::ICmp || i == 1) && + I->getOpcode() != Instruction::Sub) ? ConstantExpr::getSExt(Const, ExtTy) : ConstantExpr::getZExt(Const, ExtTy); I->setOperand(i, NewConst); } else if (isa<UndefValue>(Op)) - I->setOperand(i, UndefValue::get(ExtTy)); + I->setOperand(i, ConstantInt::get(ExtTy, 0)); } // Mutate the result type, unless this is an icmp or switch. @@ -506,7 +506,7 @@ void IRPromoter::TruncateSinks() { IRBuilder<> Builder{Ctx}; - auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* { + auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction * { if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType())) return nullptr; @@ -514,7 +514,7 @@ void IRPromoter::TruncateSinks() { return nullptr; LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for " - << *V << "\n"); + << *V << "\n"); Builder.SetInsertPoint(cast<Instruction>(V)); auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy)); if (Trunc) @@ -550,6 +550,11 @@ void IRPromoter::TruncateSinks() { continue; } + // Don't insert a trunc for a zext which can still legally promote. + if (auto ZExt = dyn_cast<ZExtInst>(I)) + if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth) + continue; + // Now handle the others. for (unsigned i = 0; i < I->getNumOperands(); ++i) { Type *Ty = TruncTysMap[I][i]; @@ -576,16 +581,14 @@ void IRPromoter::Cleanup() { Value *Src = ZExt->getOperand(0); if (ZExt->getSrcTy() == ZExt->getDestTy()) { LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt - << "\n"); + << "\n"); ReplaceAllUsersOfWith(ZExt, Src); continue; } - // Unless they produce a value that is narrower than ExtTy, we can - // replace the result of the zext with the input of a newly inserted - // trunc. - if (NewInsts.count(Src) && isa<TruncInst>(Src) && - Src->getType() == OrigTy) { + // We've inserted a trunc for a zext sink, but we already know that the + // input is in range, negating the need for the trunc. + if (NewInsts.count(Src) && isa<TruncInst>(Src)) { auto *Trunc = cast<TruncInst>(Src); assert(Trunc->getOperand(0)->getType() == ExtTy && "expected inserted trunc to be operating on i32"); @@ -615,7 +618,7 @@ void IRPromoter::ConvertTruncs() { unsigned NumBits = DestTy->getScalarSizeInBits(); ConstantInt *Mask = - ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); + ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue()); Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); if (auto *I = dyn_cast<Instruction>(Masked)) @@ -626,8 +629,8 @@ void IRPromoter::ConvertTruncs() { } void IRPromoter::Mutate() { - LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from " - << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n"); + LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to " + << PromotedWidth << "-bits\n"); // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { @@ -677,8 +680,7 @@ bool TypePromotion::isSupportedType(Value *V) { if (Ty->isVoidTy() || Ty->isPointerTy()) return true; - if (!isa<IntegerType>(Ty) || - cast<IntegerType>(Ty)->getBitWidth() == 1 || + if (!isa<IntegerType>(Ty) || cast<IntegerType>(Ty)->getBitWidth() == 1 || cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth) return false; @@ -738,13 +740,12 @@ bool TypePromotion::isSupportedValue(Value *V) { /// smaller than the targeted promoted type. Check that we're not trying to /// promote something larger than our base 'TypeSize' type. bool TypePromotion::isLegalToPromote(Value *V) { - auto *I = dyn_cast<Instruction>(V); if (!I) return true; if (SafeToPromote.count(I)) - return true; + return true; if (isPromotedResultSafe(I) || isSafeWrap(I)) { SafeToPromote.insert(I); @@ -765,10 +766,10 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from " << TypeSize << " bits to " << PromotedWidth << "\n"); - SetVector<Value*> WorkList; - SetVector<Value*> Sources; - SetVector<Instruction*> Sinks; - SetVector<Value*> CurrentVisited; + SetVector<Value *> WorkList; + SetVector<Value *> Sources; + SetVector<Instruction *> Sinks; + SetVector<Value *> CurrentVisited; WorkList.insert(V); // Return true if V was added to the worklist as a supported instruction, @@ -839,14 +840,15 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { } } - LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n"; - for (auto *I : CurrentVisited) - I->dump(); - ); + LLVM_DEBUG({ + dbgs() << "IR Promotion: Visited nodes:\n"; + for (auto *I : CurrentVisited) + I->dump(); + }); unsigned ToPromote = 0; unsigned NonFreeArgs = 0; - SmallPtrSet<BasicBlock*, 4> Blocks; + SmallPtrSet<BasicBlock *, 4> Blocks; for (auto *V : CurrentVisited) { if (auto *I = dyn_cast<Instruction>(V)) Blocks.insert(I->getParent()); @@ -860,16 +862,16 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) { if (Sinks.count(cast<Instruction>(V))) continue; - ++ToPromote; - } + ++ToPromote; + } // DAG optimizations should be able to handle these cases better, especially // for function arguments. if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size()))) return false; - IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth, - CurrentVisited, Sources, Sinks, SafeWrap); + IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks, + SafeWrap); Promoter.Mutate(); return true; } @@ -893,14 +895,14 @@ bool TypePromotion::runOnFunction(Function &F) { const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F); const TargetLowering *TLI = SubtargetInfo->getTargetLowering(); const TargetTransformInfo &TII = - getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); RegisterBitWidth = TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize(); Ctx = &F.getParent()->getContext(); // Search up from icmps to try to promote their operands. for (BasicBlock &BB : F) { - for (auto &I : BB) { + for (Instruction &I : BB) { if (AllVisited.count(&I)) continue; @@ -909,8 +911,7 @@ bool TypePromotion::runOnFunction(Function &F) { auto *ICmp = cast<ICmpInst>(&I); // Skip signed or pointer compares - if (ICmp->isSigned() || - !isa<IntegerType>(ICmp->getOperand(0)->getType())) + if (ICmp->isSigned() || !isa<IntegerType>(ICmp->getOperand(0)->getType())) continue; LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n"); @@ -921,13 +922,13 @@ bool TypePromotion::runOnFunction(Function &F) { if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT())) break; - if (TLI->getTypeAction(ICmp->getContext(), SrcVT) != + if (TLI->getTypeAction(*Ctx, SrcVT) != TargetLowering::TypePromoteInteger) break; - EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT); + EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT); if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) { LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register " - << "for promoted type\n"); + << "for promoted type\n"); break; } @@ -936,13 +937,7 @@ bool TypePromotion::runOnFunction(Function &F) { } } } - LLVM_DEBUG(if (verifyFunction(F, &dbgs())) { - dbgs() << F; - report_fatal_error("Broken function after type promotion"); - }); } - if (MadeChange) - LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n"); AllVisited.clear(); SafeToPromote.clear(); @@ -956,6 +951,4 @@ INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false) char TypePromotion::ID = 0; -FunctionPass *llvm::createTypePromotionPass() { - return new TypePromotion(); -} +FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 3426a03b6083..5e8514f525e9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -26,16 +26,10 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -131,8 +125,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) if (start->getOperand(i).isMBB() && start->getOperand(i).getMBB() == &BB) { - start->RemoveOperand(i); - start->RemoveOperand(i-1); + start->removeOperand(i); + start->removeOperand(i-1); } start++; @@ -162,8 +156,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { while (phi != BB.end() && phi->isPHI()) { for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) if (!preds.count(phi->getOperand(i).getMBB())) { - phi->RemoveOperand(i); - phi->RemoveOperand(i-1); + phi->removeOperand(i); + phi->removeOperand(i-1); ModifiedPHI = true; } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp index 5f59cb4643f2..8b5b585090f5 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/VLIWMachineScheduler.cpp @@ -27,7 +27,6 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -43,19 +42,18 @@ using namespace llvm; #define DEBUG_TYPE "machine-scheduler" static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, - cl::ZeroOrMore, cl::init(false)); + cl::init(false)); static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, - cl::ZeroOrMore, cl::init(true)); + cl::init(true)); static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", - cl::Hidden, cl::ZeroOrMore, - cl::init(1)); + cl::Hidden, cl::init(1)); // Check if the scheduler should penalize instructions that are available to // early due to a zero-latency dependence. static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, - cl::ZeroOrMore, cl::init(true)); + cl::init(true)); // This value is used to determine if a register class is a high pressure set. // We compute the maximum number of registers needed and divided by the total diff --git a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp index 0c42bef82005..f577aff39ea7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/ValueTypes.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" +#include "llvm/Support/WithColor.h" using namespace llvm; EVT EVT::changeExtendedTypeToInteger() const { @@ -179,19 +180,22 @@ std::string EVT::getEVTString() const { /// specified EVT. For integer types, this returns an unsigned type. Note /// that this will abort for types that cannot be represented. Type *EVT::getTypeForEVT(LLVMContext &Context) const { + // clang-format off switch (V.SimpleTy) { default: assert(isExtended() && "Type is not extended!"); return LLVMTy; case MVT::isVoid: return Type::getVoidTy(Context); case MVT::i1: return Type::getInt1Ty(Context); + case MVT::i2: return Type::getIntNTy(Context, 2); + case MVT::i4: return Type::getIntNTy(Context, 4); case MVT::i8: return Type::getInt8Ty(Context); case MVT::i16: return Type::getInt16Ty(Context); case MVT::i32: return Type::getInt32Ty(Context); case MVT::i64: return Type::getInt64Ty(Context); case MVT::i128: return IntegerType::get(Context, 128); case MVT::f16: return Type::getHalfTy(Context); - case MVT::bf16: return Type::getBFloatTy(Context); + case MVT::bf16: return Type::getBFloatTy(Context); case MVT::f32: return Type::getFloatTy(Context); case MVT::f64: return Type::getDoubleTy(Context); case MVT::f80: return Type::getX86_FP80Ty(Context); @@ -228,6 +232,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v128i2: + return FixedVectorType::get(Type::getIntNTy(Context, 2), 128); + case MVT::v64i4: + return FixedVectorType::get(Type::getIntNTy(Context, 4), 64); case MVT::v1i8: return FixedVectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: @@ -500,6 +508,10 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return ScalableVectorType::get(Type::getBFloatTy(Context), 4); case MVT::nxv8bf16: return ScalableVectorType::get(Type::getBFloatTy(Context), 8); + case MVT::nxv16bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 16); + case MVT::nxv32bf16: + return ScalableVectorType::get(Type::getBFloatTy(Context), 32); case MVT::nxv1f32: return ScalableVectorType::get(Type::getFloatTy(Context), 1); case MVT::nxv2f32: @@ -520,6 +532,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return ScalableVectorType::get(Type::getDoubleTy(Context), 8); case MVT::Metadata: return Type::getMetadataTy(Context); } + // clang-format on } /// Return the value type corresponding to the specified type. This returns all diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp index c04a7b28eff9..aa6645227edb 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WasmEHPrepare.cpp @@ -77,8 +77,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsWebAssembly.h" @@ -212,9 +212,15 @@ bool WasmEHPrepare::prepareEHPads(Function &F) { assert(F.hasPersonalityFn() && "Personality function not found"); - // __wasm_lpad_context global variable + // __wasm_lpad_context global variable. + // This variable should be thread local. If the target does not support TLS, + // we depend on CoalesceFeaturesAndStripAtomics to downgrade it to + // non-thread-local ones, in which case we don't allow this object to be + // linked with other objects using shared memory. LPadContextGV = cast<GlobalVariable>( M.getOrInsertGlobal("__wasm_lpad_context", LPadContextTy)); + LPadContextGV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); + LPadIndexField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 0, "lpad_index_gep"); LSDAField = diff --git a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp index d31183e46d65..b835503ee9ed 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -19,14 +19,14 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1256,4 +1256,4 @@ void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II, LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd); } -WinEHFuncInfo::WinEHFuncInfo() {} +WinEHFuncInfo::WinEHFuncInfo() = default; |